LLVM 19.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
17#include "AArch64PointerAuth.h"
18#include "AArch64Subtarget.h"
22#include "llvm/ADT/ArrayRef.h"
23#include "llvm/ADT/STLExtras.h"
41#include "llvm/IR/DebugLoc.h"
42#include "llvm/IR/GlobalValue.h"
43#include "llvm/IR/Module.h"
44#include "llvm/MC/MCAsmInfo.h"
45#include "llvm/MC/MCInst.h"
47#include "llvm/MC/MCInstrDesc.h"
52#include "llvm/Support/LEB128.h"
56#include <cassert>
57#include <cstdint>
58#include <iterator>
59#include <utility>
60
61using namespace llvm;
62
63#define GET_INSTRINFO_CTOR_DTOR
64#include "AArch64GenInstrInfo.inc"
65
67 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
68 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
69
71 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
72 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
73
75 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
76 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
77
79 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
80 cl::desc("Restrict range of B instructions (DEBUG)"));
81
83 : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP,
84 AArch64::CATCHRET),
85 RI(STI.getTargetTriple()), Subtarget(STI) {}
86
87/// GetInstSize - Return the number of bytes of code the specified
88/// instruction may be. This returns the maximum number of bytes.
90 const MachineBasicBlock &MBB = *MI.getParent();
91 const MachineFunction *MF = MBB.getParent();
92 const Function &F = MF->getFunction();
93 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
94
95 {
96 auto Op = MI.getOpcode();
97 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
98 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
99 }
100
101 // Meta-instructions emit no code.
102 if (MI.isMetaInstruction())
103 return 0;
104
105 // FIXME: We currently only handle pseudoinstructions that don't get expanded
106 // before the assembly printer.
107 unsigned NumBytes = 0;
108 const MCInstrDesc &Desc = MI.getDesc();
109
110 // Size should be preferably set in
111 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
112 // Specific cases handle instructions of variable sizes
113 switch (Desc.getOpcode()) {
114 default:
115 if (Desc.getSize())
116 return Desc.getSize();
117
118 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
119 // with fixed constant size but not specified in .td file) is a normal
120 // 4-byte insn.
121 NumBytes = 4;
122 break;
123 case TargetOpcode::STACKMAP:
124 // The upper bound for a stackmap intrinsic is the full length of its shadow
125 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
126 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
127 break;
128 case TargetOpcode::PATCHPOINT:
129 // The size of the patchpoint intrinsic is the number of bytes requested
130 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
131 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
132 break;
133 case TargetOpcode::STATEPOINT:
134 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
135 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
136 // No patch bytes means a normal call inst is emitted
137 if (NumBytes == 0)
138 NumBytes = 4;
139 break;
140 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
141 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
142 // instructions are expanded to the specified number of NOPs. Otherwise,
143 // they are expanded to 36-byte XRay sleds.
144 NumBytes =
145 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
146 break;
147 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
148 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
149 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
150 NumBytes = 36;
151 break;
152 case TargetOpcode::PATCHABLE_EVENT_CALL:
153 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
154 NumBytes = 24;
155 break;
156
157 case AArch64::SPACE:
158 NumBytes = MI.getOperand(1).getImm();
159 break;
160 case TargetOpcode::BUNDLE:
161 NumBytes = getInstBundleLength(MI);
162 break;
163 }
164
165 return NumBytes;
166}
167
168unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
169 unsigned Size = 0;
171 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
172 while (++I != E && I->isInsideBundle()) {
173 assert(!I->isBundle() && "No nested bundle!");
175 }
176 return Size;
177}
178
181 // Block ends with fall-through condbranch.
182 switch (LastInst->getOpcode()) {
183 default:
184 llvm_unreachable("Unknown branch instruction?");
185 case AArch64::Bcc:
186 Target = LastInst->getOperand(1).getMBB();
187 Cond.push_back(LastInst->getOperand(0));
188 break;
189 case AArch64::CBZW:
190 case AArch64::CBZX:
191 case AArch64::CBNZW:
192 case AArch64::CBNZX:
193 Target = LastInst->getOperand(1).getMBB();
194 Cond.push_back(MachineOperand::CreateImm(-1));
195 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
196 Cond.push_back(LastInst->getOperand(0));
197 break;
198 case AArch64::TBZW:
199 case AArch64::TBZX:
200 case AArch64::TBNZW:
201 case AArch64::TBNZX:
202 Target = LastInst->getOperand(2).getMBB();
203 Cond.push_back(MachineOperand::CreateImm(-1));
204 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
205 Cond.push_back(LastInst->getOperand(0));
206 Cond.push_back(LastInst->getOperand(1));
207 }
208}
209
210static unsigned getBranchDisplacementBits(unsigned Opc) {
211 switch (Opc) {
212 default:
213 llvm_unreachable("unexpected opcode!");
214 case AArch64::B:
215 return BDisplacementBits;
216 case AArch64::TBNZW:
217 case AArch64::TBZW:
218 case AArch64::TBNZX:
219 case AArch64::TBZX:
220 return TBZDisplacementBits;
221 case AArch64::CBNZW:
222 case AArch64::CBZW:
223 case AArch64::CBNZX:
224 case AArch64::CBZX:
225 return CBZDisplacementBits;
226 case AArch64::Bcc:
227 return BCCDisplacementBits;
228 }
229}
230
232 int64_t BrOffset) const {
233 unsigned Bits = getBranchDisplacementBits(BranchOp);
234 assert(Bits >= 3 && "max branch displacement must be enough to jump"
235 "over conditional branch expansion");
236 return isIntN(Bits, BrOffset / 4);
237}
238
241 switch (MI.getOpcode()) {
242 default:
243 llvm_unreachable("unexpected opcode!");
244 case AArch64::B:
245 return MI.getOperand(0).getMBB();
246 case AArch64::TBZW:
247 case AArch64::TBNZW:
248 case AArch64::TBZX:
249 case AArch64::TBNZX:
250 return MI.getOperand(2).getMBB();
251 case AArch64::CBZW:
252 case AArch64::CBNZW:
253 case AArch64::CBZX:
254 case AArch64::CBNZX:
255 case AArch64::Bcc:
256 return MI.getOperand(1).getMBB();
257 }
258}
259
261 MachineBasicBlock &NewDestBB,
262 MachineBasicBlock &RestoreBB,
263 const DebugLoc &DL,
264 int64_t BrOffset,
265 RegScavenger *RS) const {
266 assert(RS && "RegScavenger required for long branching");
267 assert(MBB.empty() &&
268 "new block should be inserted for expanding unconditional branch");
269 assert(MBB.pred_size() == 1);
270 assert(RestoreBB.empty() &&
271 "restore block should be inserted for restoring clobbered registers");
272
273 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
274 // Offsets outside of the signed 33-bit range are not supported for ADRP +
275 // ADD.
276 if (!isInt<33>(BrOffset))
278 "Branch offsets outside of the signed 33-bit range not supported");
279
280 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
281 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
282 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
283 .addReg(Reg)
284 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
285 .addImm(0);
286 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
287 };
288
290 // If X16 is unused, we can rely on the linker to insert a range extension
291 // thunk if NewDestBB is out of range of a single B instruction.
292 constexpr Register Reg = AArch64::X16;
293 if (!RS->isRegUsed(Reg)) {
294 insertUnconditionalBranch(MBB, &NewDestBB, DL);
295 RS->setRegUsed(Reg);
296 return;
297 }
298
299 // If there's a free register and it's worth inflating the code size,
300 // manually insert the indirect branch.
301 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
302 if (Scavenged != AArch64::NoRegister &&
304 buildIndirectBranch(Scavenged, NewDestBB);
305 RS->setRegUsed(Scavenged);
306 return;
307 }
308
309 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
310 // with red zones.
312 if (!AFI || AFI->hasRedZone().value_or(true))
314 "Unable to insert indirect branch inside function that has red zone");
315
316 // Otherwise, spill X16 and defer range extension to the linker.
317 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
318 .addReg(AArch64::SP, RegState::Define)
319 .addReg(Reg)
320 .addReg(AArch64::SP)
321 .addImm(-16);
322
323 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
324
325 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
326 .addReg(AArch64::SP, RegState::Define)
328 .addReg(AArch64::SP)
329 .addImm(16);
330}
331
332// Branch analysis.
335 MachineBasicBlock *&FBB,
337 bool AllowModify) const {
338 // If the block has no terminators, it just falls into the block after it.
340 if (I == MBB.end())
341 return false;
342
343 // Skip over SpeculationBarrierEndBB terminators
344 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
345 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
346 --I;
347 }
348
349 if (!isUnpredicatedTerminator(*I))
350 return false;
351
352 // Get the last instruction in the block.
353 MachineInstr *LastInst = &*I;
354
355 // If there is only one terminator instruction, process it.
356 unsigned LastOpc = LastInst->getOpcode();
357 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
358 if (isUncondBranchOpcode(LastOpc)) {
359 TBB = LastInst->getOperand(0).getMBB();
360 return false;
361 }
362 if (isCondBranchOpcode(LastOpc)) {
363 // Block ends with fall-through condbranch.
364 parseCondBranch(LastInst, TBB, Cond);
365 return false;
366 }
367 return true; // Can't handle indirect branch.
368 }
369
370 // Get the instruction before it if it is a terminator.
371 MachineInstr *SecondLastInst = &*I;
372 unsigned SecondLastOpc = SecondLastInst->getOpcode();
373
374 // If AllowModify is true and the block ends with two or more unconditional
375 // branches, delete all but the first unconditional branch.
376 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
377 while (isUncondBranchOpcode(SecondLastOpc)) {
378 LastInst->eraseFromParent();
379 LastInst = SecondLastInst;
380 LastOpc = LastInst->getOpcode();
381 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
382 // Return now the only terminator is an unconditional branch.
383 TBB = LastInst->getOperand(0).getMBB();
384 return false;
385 }
386 SecondLastInst = &*I;
387 SecondLastOpc = SecondLastInst->getOpcode();
388 }
389 }
390
391 // If we're allowed to modify and the block ends in a unconditional branch
392 // which could simply fallthrough, remove the branch. (Note: This case only
393 // matters when we can't understand the whole sequence, otherwise it's also
394 // handled by BranchFolding.cpp.)
395 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
397 LastInst->eraseFromParent();
398 LastInst = SecondLastInst;
399 LastOpc = LastInst->getOpcode();
400 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
401 assert(!isUncondBranchOpcode(LastOpc) &&
402 "unreachable unconditional branches removed above");
403
404 if (isCondBranchOpcode(LastOpc)) {
405 // Block ends with fall-through condbranch.
406 parseCondBranch(LastInst, TBB, Cond);
407 return false;
408 }
409 return true; // Can't handle indirect branch.
410 }
411 SecondLastInst = &*I;
412 SecondLastOpc = SecondLastInst->getOpcode();
413 }
414
415 // If there are three terminators, we don't know what sort of block this is.
416 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
417 return true;
418
419 // If the block ends with a B and a Bcc, handle it.
420 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
421 parseCondBranch(SecondLastInst, TBB, Cond);
422 FBB = LastInst->getOperand(0).getMBB();
423 return false;
424 }
425
426 // If the block ends with two unconditional branches, handle it. The second
427 // one is not executed, so remove it.
428 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
429 TBB = SecondLastInst->getOperand(0).getMBB();
430 I = LastInst;
431 if (AllowModify)
432 I->eraseFromParent();
433 return false;
434 }
435
436 // ...likewise if it ends with an indirect branch followed by an unconditional
437 // branch.
438 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
439 I = LastInst;
440 if (AllowModify)
441 I->eraseFromParent();
442 return true;
443 }
444
445 // Otherwise, can't handle this.
446 return true;
447}
448
450 MachineBranchPredicate &MBP,
451 bool AllowModify) const {
452 // For the moment, handle only a block which ends with a cb(n)zx followed by
453 // a fallthrough. Why this? Because it is a common form.
454 // TODO: Should we handle b.cc?
455
457 if (I == MBB.end())
458 return true;
459
460 // Skip over SpeculationBarrierEndBB terminators
461 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
462 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
463 --I;
464 }
465
466 if (!isUnpredicatedTerminator(*I))
467 return true;
468
469 // Get the last instruction in the block.
470 MachineInstr *LastInst = &*I;
471 unsigned LastOpc = LastInst->getOpcode();
472 if (!isCondBranchOpcode(LastOpc))
473 return true;
474
475 switch (LastOpc) {
476 default:
477 return true;
478 case AArch64::CBZW:
479 case AArch64::CBZX:
480 case AArch64::CBNZW:
481 case AArch64::CBNZX:
482 break;
483 };
484
485 MBP.TrueDest = LastInst->getOperand(1).getMBB();
486 assert(MBP.TrueDest && "expected!");
487 MBP.FalseDest = MBB.getNextNode();
488
489 MBP.ConditionDef = nullptr;
490 MBP.SingleUseCondition = false;
491
492 MBP.LHS = LastInst->getOperand(0);
493 MBP.RHS = MachineOperand::CreateImm(0);
494 MBP.Predicate = LastOpc == AArch64::CBNZX ? MachineBranchPredicate::PRED_NE
495 : MachineBranchPredicate::PRED_EQ;
496 return false;
497}
498
501 if (Cond[0].getImm() != -1) {
502 // Regular Bcc
505 } else {
506 // Folded compare-and-branch
507 switch (Cond[1].getImm()) {
508 default:
509 llvm_unreachable("Unknown conditional branch!");
510 case AArch64::CBZW:
511 Cond[1].setImm(AArch64::CBNZW);
512 break;
513 case AArch64::CBNZW:
514 Cond[1].setImm(AArch64::CBZW);
515 break;
516 case AArch64::CBZX:
517 Cond[1].setImm(AArch64::CBNZX);
518 break;
519 case AArch64::CBNZX:
520 Cond[1].setImm(AArch64::CBZX);
521 break;
522 case AArch64::TBZW:
523 Cond[1].setImm(AArch64::TBNZW);
524 break;
525 case AArch64::TBNZW:
526 Cond[1].setImm(AArch64::TBZW);
527 break;
528 case AArch64::TBZX:
529 Cond[1].setImm(AArch64::TBNZX);
530 break;
531 case AArch64::TBNZX:
532 Cond[1].setImm(AArch64::TBZX);
533 break;
534 }
535 }
536
537 return false;
538}
539
541 int *BytesRemoved) const {
543 if (I == MBB.end())
544 return 0;
545
546 if (!isUncondBranchOpcode(I->getOpcode()) &&
547 !isCondBranchOpcode(I->getOpcode()))
548 return 0;
549
550 // Remove the branch.
551 I->eraseFromParent();
552
553 I = MBB.end();
554
555 if (I == MBB.begin()) {
556 if (BytesRemoved)
557 *BytesRemoved = 4;
558 return 1;
559 }
560 --I;
561 if (!isCondBranchOpcode(I->getOpcode())) {
562 if (BytesRemoved)
563 *BytesRemoved = 4;
564 return 1;
565 }
566
567 // Remove the branch.
568 I->eraseFromParent();
569 if (BytesRemoved)
570 *BytesRemoved = 8;
571
572 return 2;
573}
574
575void AArch64InstrInfo::instantiateCondBranch(
578 if (Cond[0].getImm() != -1) {
579 // Regular Bcc
580 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
581 } else {
582 // Folded compare-and-branch
583 // Note that we use addOperand instead of addReg to keep the flags.
584 const MachineInstrBuilder MIB =
585 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
586 if (Cond.size() > 3)
587 MIB.addImm(Cond[3].getImm());
588 MIB.addMBB(TBB);
589 }
590}
591
594 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
595 // Shouldn't be a fall through.
596 assert(TBB && "insertBranch must not be told to insert a fallthrough");
597
598 if (!FBB) {
599 if (Cond.empty()) // Unconditional branch?
600 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
601 else
602 instantiateCondBranch(MBB, DL, TBB, Cond);
603
604 if (BytesAdded)
605 *BytesAdded = 4;
606
607 return 1;
608 }
609
610 // Two-way conditional branch.
611 instantiateCondBranch(MBB, DL, TBB, Cond);
612 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
613
614 if (BytesAdded)
615 *BytesAdded = 8;
616
617 return 2;
618}
619
620// Find the original register that VReg is copied from.
621static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
622 while (Register::isVirtualRegister(VReg)) {
623 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
624 if (!DefMI->isFullCopy())
625 return VReg;
626 VReg = DefMI->getOperand(1).getReg();
627 }
628 return VReg;
629}
630
631// Determine if VReg is defined by an instruction that can be folded into a
632// csel instruction. If so, return the folded opcode, and the replacement
633// register.
634static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
635 unsigned *NewVReg = nullptr) {
636 VReg = removeCopies(MRI, VReg);
638 return 0;
639
640 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
641 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
642 unsigned Opc = 0;
643 unsigned SrcOpNum = 0;
644 switch (DefMI->getOpcode()) {
645 case AArch64::ADDSXri:
646 case AArch64::ADDSWri:
647 // if NZCV is used, do not fold.
648 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
649 true) == -1)
650 return 0;
651 // fall-through to ADDXri and ADDWri.
652 [[fallthrough]];
653 case AArch64::ADDXri:
654 case AArch64::ADDWri:
655 // add x, 1 -> csinc.
656 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
657 DefMI->getOperand(3).getImm() != 0)
658 return 0;
659 SrcOpNum = 1;
660 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
661 break;
662
663 case AArch64::ORNXrr:
664 case AArch64::ORNWrr: {
665 // not x -> csinv, represented as orn dst, xzr, src.
666 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
667 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
668 return 0;
669 SrcOpNum = 2;
670 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
671 break;
672 }
673
674 case AArch64::SUBSXrr:
675 case AArch64::SUBSWrr:
676 // if NZCV is used, do not fold.
677 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
678 true) == -1)
679 return 0;
680 // fall-through to SUBXrr and SUBWrr.
681 [[fallthrough]];
682 case AArch64::SUBXrr:
683 case AArch64::SUBWrr: {
684 // neg x -> csneg, represented as sub dst, xzr, src.
685 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
686 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
687 return 0;
688 SrcOpNum = 2;
689 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
690 break;
691 }
692 default:
693 return 0;
694 }
695 assert(Opc && SrcOpNum && "Missing parameters");
696
697 if (NewVReg)
698 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
699 return Opc;
700}
701
704 Register DstReg, Register TrueReg,
705 Register FalseReg, int &CondCycles,
706 int &TrueCycles,
707 int &FalseCycles) const {
708 // Check register classes.
710 const TargetRegisterClass *RC =
711 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
712 if (!RC)
713 return false;
714
715 // Also need to check the dest regclass, in case we're trying to optimize
716 // something like:
717 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
718 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
719 return false;
720
721 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
722 unsigned ExtraCondLat = Cond.size() != 1;
723
724 // GPRs are handled by csel.
725 // FIXME: Fold in x+1, -x, and ~x when applicable.
726 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
727 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
728 // Single-cycle csel, csinc, csinv, and csneg.
729 CondCycles = 1 + ExtraCondLat;
730 TrueCycles = FalseCycles = 1;
731 if (canFoldIntoCSel(MRI, TrueReg))
732 TrueCycles = 0;
733 else if (canFoldIntoCSel(MRI, FalseReg))
734 FalseCycles = 0;
735 return true;
736 }
737
738 // Scalar floating point is handled by fcsel.
739 // FIXME: Form fabs, fmin, and fmax when applicable.
740 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
741 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
742 CondCycles = 5 + ExtraCondLat;
743 TrueCycles = FalseCycles = 2;
744 return true;
745 }
746
747 // Can't do vectors.
748 return false;
749}
750
753 const DebugLoc &DL, Register DstReg,
755 Register TrueReg, Register FalseReg) const {
757
758 // Parse the condition code, see parseCondBranch() above.
760 switch (Cond.size()) {
761 default:
762 llvm_unreachable("Unknown condition opcode in Cond");
763 case 1: // b.cc
764 CC = AArch64CC::CondCode(Cond[0].getImm());
765 break;
766 case 3: { // cbz/cbnz
767 // We must insert a compare against 0.
768 bool Is64Bit;
769 switch (Cond[1].getImm()) {
770 default:
771 llvm_unreachable("Unknown branch opcode in Cond");
772 case AArch64::CBZW:
773 Is64Bit = false;
775 break;
776 case AArch64::CBZX:
777 Is64Bit = true;
779 break;
780 case AArch64::CBNZW:
781 Is64Bit = false;
783 break;
784 case AArch64::CBNZX:
785 Is64Bit = true;
787 break;
788 }
789 Register SrcReg = Cond[2].getReg();
790 if (Is64Bit) {
791 // cmp reg, #0 is actually subs xzr, reg, #0.
792 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
793 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
794 .addReg(SrcReg)
795 .addImm(0)
796 .addImm(0);
797 } else {
798 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
799 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
800 .addReg(SrcReg)
801 .addImm(0)
802 .addImm(0);
803 }
804 break;
805 }
806 case 4: { // tbz/tbnz
807 // We must insert a tst instruction.
808 switch (Cond[1].getImm()) {
809 default:
810 llvm_unreachable("Unknown branch opcode in Cond");
811 case AArch64::TBZW:
812 case AArch64::TBZX:
814 break;
815 case AArch64::TBNZW:
816 case AArch64::TBNZX:
818 break;
819 }
820 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
821 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
822 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
823 .addReg(Cond[2].getReg())
824 .addImm(
825 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
826 else
827 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
828 .addReg(Cond[2].getReg())
829 .addImm(
830 AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
831 break;
832 }
833 }
834
835 unsigned Opc = 0;
836 const TargetRegisterClass *RC = nullptr;
837 bool TryFold = false;
838 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
839 RC = &AArch64::GPR64RegClass;
840 Opc = AArch64::CSELXr;
841 TryFold = true;
842 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
843 RC = &AArch64::GPR32RegClass;
844 Opc = AArch64::CSELWr;
845 TryFold = true;
846 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
847 RC = &AArch64::FPR64RegClass;
848 Opc = AArch64::FCSELDrrr;
849 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
850 RC = &AArch64::FPR32RegClass;
851 Opc = AArch64::FCSELSrrr;
852 }
853 assert(RC && "Unsupported regclass");
854
855 // Try folding simple instructions into the csel.
856 if (TryFold) {
857 unsigned NewVReg = 0;
858 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
859 if (FoldedOpc) {
860 // The folded opcodes csinc, csinc and csneg apply the operation to
861 // FalseReg, so we need to invert the condition.
863 TrueReg = FalseReg;
864 } else
865 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
866
867 // Fold the operation. Leave any dead instructions for DCE to clean up.
868 if (FoldedOpc) {
869 FalseReg = NewVReg;
870 Opc = FoldedOpc;
871 // The extends the live range of NewVReg.
872 MRI.clearKillFlags(NewVReg);
873 }
874 }
875
876 // Pull all virtual register into the appropriate class.
877 MRI.constrainRegClass(TrueReg, RC);
878 MRI.constrainRegClass(FalseReg, RC);
879
880 // Insert the csel.
881 BuildMI(MBB, I, DL, get(Opc), DstReg)
882 .addReg(TrueReg)
883 .addReg(FalseReg)
884 .addImm(CC);
885}
886
887// Return true if Imm can be loaded into a register by a "cheap" sequence of
888// instructions. For now, "cheap" means at most two instructions.
889static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
890 if (BitSize == 32)
891 return true;
892
893 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
894 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
896 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
897
898 return Is.size() <= 2;
899}
900
901// FIXME: this implementation should be micro-architecture dependent, so a
902// micro-architecture target hook should be introduced here in future.
904 if (Subtarget.hasExynosCheapAsMoveHandling()) {
905 if (isExynosCheapAsMove(MI))
906 return true;
907 return MI.isAsCheapAsAMove();
908 }
909
910 switch (MI.getOpcode()) {
911 default:
912 return MI.isAsCheapAsAMove();
913
914 case AArch64::ADDWrs:
915 case AArch64::ADDXrs:
916 case AArch64::SUBWrs:
917 case AArch64::SUBXrs:
918 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
919
920 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
921 // ORRXri, it is as cheap as MOV.
922 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
923 case AArch64::MOVi32imm:
924 return isCheapImmediate(MI, 32);
925 case AArch64::MOVi64imm:
926 return isCheapImmediate(MI, 64);
927 }
928}
929
931 switch (MI.getOpcode()) {
932 default:
933 return false;
934
935 case AArch64::ADDWrs:
936 case AArch64::ADDXrs:
937 case AArch64::ADDSWrs:
938 case AArch64::ADDSXrs: {
939 unsigned Imm = MI.getOperand(3).getImm();
940 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
941 if (ShiftVal == 0)
942 return true;
943 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
944 }
945
946 case AArch64::ADDWrx:
947 case AArch64::ADDXrx:
948 case AArch64::ADDXrx64:
949 case AArch64::ADDSWrx:
950 case AArch64::ADDSXrx:
951 case AArch64::ADDSXrx64: {
952 unsigned Imm = MI.getOperand(3).getImm();
953 switch (AArch64_AM::getArithExtendType(Imm)) {
954 default:
955 return false;
956 case AArch64_AM::UXTB:
957 case AArch64_AM::UXTH:
958 case AArch64_AM::UXTW:
959 case AArch64_AM::UXTX:
960 return AArch64_AM::getArithShiftValue(Imm) <= 4;
961 }
962 }
963
964 case AArch64::SUBWrs:
965 case AArch64::SUBSWrs: {
966 unsigned Imm = MI.getOperand(3).getImm();
967 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
968 return ShiftVal == 0 ||
969 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
970 }
971
972 case AArch64::SUBXrs:
973 case AArch64::SUBSXrs: {
974 unsigned Imm = MI.getOperand(3).getImm();
975 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
976 return ShiftVal == 0 ||
977 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
978 }
979
980 case AArch64::SUBWrx:
981 case AArch64::SUBXrx:
982 case AArch64::SUBXrx64:
983 case AArch64::SUBSWrx:
984 case AArch64::SUBSXrx:
985 case AArch64::SUBSXrx64: {
986 unsigned Imm = MI.getOperand(3).getImm();
987 switch (AArch64_AM::getArithExtendType(Imm)) {
988 default:
989 return false;
990 case AArch64_AM::UXTB:
991 case AArch64_AM::UXTH:
992 case AArch64_AM::UXTW:
993 case AArch64_AM::UXTX:
994 return AArch64_AM::getArithShiftValue(Imm) == 0;
995 }
996 }
997
998 case AArch64::LDRBBroW:
999 case AArch64::LDRBBroX:
1000 case AArch64::LDRBroW:
1001 case AArch64::LDRBroX:
1002 case AArch64::LDRDroW:
1003 case AArch64::LDRDroX:
1004 case AArch64::LDRHHroW:
1005 case AArch64::LDRHHroX:
1006 case AArch64::LDRHroW:
1007 case AArch64::LDRHroX:
1008 case AArch64::LDRQroW:
1009 case AArch64::LDRQroX:
1010 case AArch64::LDRSBWroW:
1011 case AArch64::LDRSBWroX:
1012 case AArch64::LDRSBXroW:
1013 case AArch64::LDRSBXroX:
1014 case AArch64::LDRSHWroW:
1015 case AArch64::LDRSHWroX:
1016 case AArch64::LDRSHXroW:
1017 case AArch64::LDRSHXroX:
1018 case AArch64::LDRSWroW:
1019 case AArch64::LDRSWroX:
1020 case AArch64::LDRSroW:
1021 case AArch64::LDRSroX:
1022 case AArch64::LDRWroW:
1023 case AArch64::LDRWroX:
1024 case AArch64::LDRXroW:
1025 case AArch64::LDRXroX:
1026 case AArch64::PRFMroW:
1027 case AArch64::PRFMroX:
1028 case AArch64::STRBBroW:
1029 case AArch64::STRBBroX:
1030 case AArch64::STRBroW:
1031 case AArch64::STRBroX:
1032 case AArch64::STRDroW:
1033 case AArch64::STRDroX:
1034 case AArch64::STRHHroW:
1035 case AArch64::STRHHroX:
1036 case AArch64::STRHroW:
1037 case AArch64::STRHroX:
1038 case AArch64::STRQroW:
1039 case AArch64::STRQroX:
1040 case AArch64::STRSroW:
1041 case AArch64::STRSroX:
1042 case AArch64::STRWroW:
1043 case AArch64::STRWroX:
1044 case AArch64::STRXroW:
1045 case AArch64::STRXroX: {
1046 unsigned IsSigned = MI.getOperand(3).getImm();
1047 return !IsSigned;
1048 }
1049 }
1050}
1051
1053 unsigned Opc = MI.getOpcode();
1054 switch (Opc) {
1055 default:
1056 return false;
1057 case AArch64::SEH_StackAlloc:
1058 case AArch64::SEH_SaveFPLR:
1059 case AArch64::SEH_SaveFPLR_X:
1060 case AArch64::SEH_SaveReg:
1061 case AArch64::SEH_SaveReg_X:
1062 case AArch64::SEH_SaveRegP:
1063 case AArch64::SEH_SaveRegP_X:
1064 case AArch64::SEH_SaveFReg:
1065 case AArch64::SEH_SaveFReg_X:
1066 case AArch64::SEH_SaveFRegP:
1067 case AArch64::SEH_SaveFRegP_X:
1068 case AArch64::SEH_SetFP:
1069 case AArch64::SEH_AddFP:
1070 case AArch64::SEH_Nop:
1071 case AArch64::SEH_PrologEnd:
1072 case AArch64::SEH_EpilogStart:
1073 case AArch64::SEH_EpilogEnd:
1074 case AArch64::SEH_PACSignLR:
1075 case AArch64::SEH_SaveAnyRegQP:
1076 case AArch64::SEH_SaveAnyRegQPX:
1077 return true;
1078 }
1079}
1080
1082 Register &SrcReg, Register &DstReg,
1083 unsigned &SubIdx) const {
1084 switch (MI.getOpcode()) {
1085 default:
1086 return false;
1087 case AArch64::SBFMXri: // aka sxtw
1088 case AArch64::UBFMXri: // aka uxtw
1089 // Check for the 32 -> 64 bit extension case, these instructions can do
1090 // much more.
1091 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1092 return false;
1093 // This is a signed or unsigned 32 -> 64 bit extension.
1094 SrcReg = MI.getOperand(1).getReg();
1095 DstReg = MI.getOperand(0).getReg();
1096 SubIdx = AArch64::sub_32;
1097 return true;
1098 }
1099}
1100
1102 const MachineInstr &MIa, const MachineInstr &MIb) const {
1104 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1105 int64_t OffsetA = 0, OffsetB = 0;
1106 TypeSize WidthA(0, false), WidthB(0, false);
1107 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1108
1109 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1110 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1111
1114 return false;
1115
1116 // Retrieve the base, offset from the base and width. Width
1117 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1118 // base are identical, and the offset of a lower memory access +
1119 // the width doesn't overlap the offset of a higher memory access,
1120 // then the memory accesses are different.
1121 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1122 // are assumed to have the same scale (vscale).
1123 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1124 WidthA, TRI) &&
1125 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1126 WidthB, TRI)) {
1127 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1128 OffsetAIsScalable == OffsetBIsScalable) {
1129 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1130 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1131 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1132 if (LowWidth.isScalable() == OffsetAIsScalable &&
1133 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1134 return true;
1135 }
1136 }
1137 return false;
1138}
1139
1141 const MachineBasicBlock *MBB,
1142 const MachineFunction &MF) const {
1144 return true;
1145
1146 // Do not move an instruction that can be recognized as a branch target.
1147 if (hasBTISemantics(MI))
1148 return true;
1149
1150 switch (MI.getOpcode()) {
1151 case AArch64::HINT:
1152 // CSDB hints are scheduling barriers.
1153 if (MI.getOperand(0).getImm() == 0x14)
1154 return true;
1155 break;
1156 case AArch64::DSB:
1157 case AArch64::ISB:
1158 // DSB and ISB also are scheduling barriers.
1159 return true;
1160 case AArch64::MSRpstatesvcrImm1:
1161 // SMSTART and SMSTOP are also scheduling barriers.
1162 return true;
1163 default:;
1164 }
1165 if (isSEHInstruction(MI))
1166 return true;
1167 auto Next = std::next(MI.getIterator());
1168 return Next != MBB->end() && Next->isCFIInstruction();
1169}
1170
1171/// analyzeCompare - For a comparison instruction, return the source registers
1172/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1173/// Return true if the comparison instruction can be analyzed.
1175 Register &SrcReg2, int64_t &CmpMask,
1176 int64_t &CmpValue) const {
1177 // The first operand can be a frame index where we'd normally expect a
1178 // register.
1179 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1180 if (!MI.getOperand(1).isReg())
1181 return false;
1182
1183 switch (MI.getOpcode()) {
1184 default:
1185 break;
1186 case AArch64::PTEST_PP:
1187 case AArch64::PTEST_PP_ANY:
1188 SrcReg = MI.getOperand(0).getReg();
1189 SrcReg2 = MI.getOperand(1).getReg();
1190 // Not sure about the mask and value for now...
1191 CmpMask = ~0;
1192 CmpValue = 0;
1193 return true;
1194 case AArch64::SUBSWrr:
1195 case AArch64::SUBSWrs:
1196 case AArch64::SUBSWrx:
1197 case AArch64::SUBSXrr:
1198 case AArch64::SUBSXrs:
1199 case AArch64::SUBSXrx:
1200 case AArch64::ADDSWrr:
1201 case AArch64::ADDSWrs:
1202 case AArch64::ADDSWrx:
1203 case AArch64::ADDSXrr:
1204 case AArch64::ADDSXrs:
1205 case AArch64::ADDSXrx:
1206 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1207 SrcReg = MI.getOperand(1).getReg();
1208 SrcReg2 = MI.getOperand(2).getReg();
1209 CmpMask = ~0;
1210 CmpValue = 0;
1211 return true;
1212 case AArch64::SUBSWri:
1213 case AArch64::ADDSWri:
1214 case AArch64::SUBSXri:
1215 case AArch64::ADDSXri:
1216 SrcReg = MI.getOperand(1).getReg();
1217 SrcReg2 = 0;
1218 CmpMask = ~0;
1219 CmpValue = MI.getOperand(2).getImm();
1220 return true;
1221 case AArch64::ANDSWri:
1222 case AArch64::ANDSXri:
1223 // ANDS does not use the same encoding scheme as the others xxxS
1224 // instructions.
1225 SrcReg = MI.getOperand(1).getReg();
1226 SrcReg2 = 0;
1227 CmpMask = ~0;
1229 MI.getOperand(2).getImm(),
1230 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1231 return true;
1232 }
1233
1234 return false;
1235}
1236
1238 MachineBasicBlock *MBB = Instr.getParent();
1239 assert(MBB && "Can't get MachineBasicBlock here");
1240 MachineFunction *MF = MBB->getParent();
1241 assert(MF && "Can't get MachineFunction here");
1245
1246 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1247 ++OpIdx) {
1248 MachineOperand &MO = Instr.getOperand(OpIdx);
1249 const TargetRegisterClass *OpRegCstraints =
1250 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1251
1252 // If there's no constraint, there's nothing to do.
1253 if (!OpRegCstraints)
1254 continue;
1255 // If the operand is a frame index, there's nothing to do here.
1256 // A frame index operand will resolve correctly during PEI.
1257 if (MO.isFI())
1258 continue;
1259
1260 assert(MO.isReg() &&
1261 "Operand has register constraints without being a register!");
1262
1263 Register Reg = MO.getReg();
1264 if (Reg.isPhysical()) {
1265 if (!OpRegCstraints->contains(Reg))
1266 return false;
1267 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1268 !MRI->constrainRegClass(Reg, OpRegCstraints))
1269 return false;
1270 }
1271
1272 return true;
1273}
1274
1275/// Return the opcode that does not set flags when possible - otherwise
1276/// return the original opcode. The caller is responsible to do the actual
1277/// substitution and legality checking.
1279 // Don't convert all compare instructions, because for some the zero register
1280 // encoding becomes the sp register.
1281 bool MIDefinesZeroReg = false;
1282 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1283 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1284 MIDefinesZeroReg = true;
1285
1286 switch (MI.getOpcode()) {
1287 default:
1288 return MI.getOpcode();
1289 case AArch64::ADDSWrr:
1290 return AArch64::ADDWrr;
1291 case AArch64::ADDSWri:
1292 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1293 case AArch64::ADDSWrs:
1294 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1295 case AArch64::ADDSWrx:
1296 return AArch64::ADDWrx;
1297 case AArch64::ADDSXrr:
1298 return AArch64::ADDXrr;
1299 case AArch64::ADDSXri:
1300 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1301 case AArch64::ADDSXrs:
1302 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1303 case AArch64::ADDSXrx:
1304 return AArch64::ADDXrx;
1305 case AArch64::SUBSWrr:
1306 return AArch64::SUBWrr;
1307 case AArch64::SUBSWri:
1308 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1309 case AArch64::SUBSWrs:
1310 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1311 case AArch64::SUBSWrx:
1312 return AArch64::SUBWrx;
1313 case AArch64::SUBSXrr:
1314 return AArch64::SUBXrr;
1315 case AArch64::SUBSXri:
1316 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1317 case AArch64::SUBSXrs:
1318 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1319 case AArch64::SUBSXrx:
1320 return AArch64::SUBXrx;
1321 }
1322}
1323
1324enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1325
1326/// True when condition flags are accessed (either by writing or reading)
1327/// on the instruction trace starting at From and ending at To.
1328///
1329/// Note: If From and To are from different blocks it's assumed CC are accessed
1330/// on the path.
1333 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1334 // Early exit if To is at the beginning of the BB.
1335 if (To == To->getParent()->begin())
1336 return true;
1337
1338 // Check whether the instructions are in the same basic block
1339 // If not, assume the condition flags might get modified somewhere.
1340 if (To->getParent() != From->getParent())
1341 return true;
1342
1343 // From must be above To.
1344 assert(std::any_of(
1345 ++To.getReverse(), To->getParent()->rend(),
1346 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1347
1348 // We iterate backward starting at \p To until we hit \p From.
1349 for (const MachineInstr &Instr :
1350 instructionsWithoutDebug(++To.getReverse(), From.getReverse())) {
1351 if (((AccessToCheck & AK_Write) &&
1352 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1353 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1354 return true;
1355 }
1356 return false;
1357}
1358
1359std::optional<unsigned>
1360AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1361 MachineInstr *Pred,
1362 const MachineRegisterInfo *MRI) const {
1363 unsigned MaskOpcode = Mask->getOpcode();
1364 unsigned PredOpcode = Pred->getOpcode();
1365 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1366 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1367
1368 if (PredIsWhileLike) {
1369 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1370 // instruction and the condition is "any" since WHILcc does an implicit
1371 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1372 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1373 return PredOpcode;
1374
1375 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1376 // redundant since WHILE performs an implicit PTEST with an all active
1377 // mask.
1378 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1379 getElementSizeForOpcode(MaskOpcode) ==
1380 getElementSizeForOpcode(PredOpcode))
1381 return PredOpcode;
1382
1383 return {};
1384 }
1385
1386 if (PredIsPTestLike) {
1387 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1388 // instruction that sets the flags as PTEST would and the condition is
1389 // "any" since PG is always a subset of the governing predicate of the
1390 // ptest-like instruction.
1391 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1392 return PredOpcode;
1393
1394 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1395 // the element size matches and either the PTEST_LIKE instruction uses
1396 // the same all active mask or the condition is "any".
1397 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1398 getElementSizeForOpcode(MaskOpcode) ==
1399 getElementSizeForOpcode(PredOpcode)) {
1400 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1401 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1402 return PredOpcode;
1403 }
1404
1405 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1406 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1407 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1408 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1409 // performed by the compare could consider fewer lanes for these element
1410 // sizes.
1411 //
1412 // For example, consider
1413 //
1414 // ptrue p0.b ; P0=1111-1111-1111-1111
1415 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1416 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1417 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1418 // ; ^ last active
1419 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1420 // ; ^ last active
1421 //
1422 // where the compare generates a canonical all active 32-bit predicate
1423 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1424 // active flag, whereas the PTEST instruction with the same mask doesn't.
1425 // For PTEST_ANY this doesn't apply as the flags in this case would be
1426 // identical regardless of element size.
1427 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1428 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1429 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1430 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1431 return PredOpcode;
1432
1433 return {};
1434 }
1435
1436 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1437 // opcode so the PTEST becomes redundant.
1438 switch (PredOpcode) {
1439 case AArch64::AND_PPzPP:
1440 case AArch64::BIC_PPzPP:
1441 case AArch64::EOR_PPzPP:
1442 case AArch64::NAND_PPzPP:
1443 case AArch64::NOR_PPzPP:
1444 case AArch64::ORN_PPzPP:
1445 case AArch64::ORR_PPzPP:
1446 case AArch64::BRKA_PPzP:
1447 case AArch64::BRKPA_PPzPP:
1448 case AArch64::BRKB_PPzP:
1449 case AArch64::BRKPB_PPzPP:
1450 case AArch64::RDFFR_PPz: {
1451 // Check to see if our mask is the same. If not the resulting flag bits
1452 // may be different and we can't remove the ptest.
1453 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1454 if (Mask != PredMask)
1455 return {};
1456 break;
1457 }
1458 case AArch64::BRKN_PPzP: {
1459 // BRKN uses an all active implicit mask to set flags unlike the other
1460 // flag-setting instructions.
1461 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1462 if ((MaskOpcode != AArch64::PTRUE_B) ||
1463 (Mask->getOperand(1).getImm() != 31))
1464 return {};
1465 break;
1466 }
1467 case AArch64::PTRUE_B:
1468 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1469 break;
1470 default:
1471 // Bail out if we don't recognize the input
1472 return {};
1473 }
1474
1475 return convertToFlagSettingOpc(PredOpcode);
1476}
1477
1478/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1479/// operation which could set the flags in an identical manner
1480bool AArch64InstrInfo::optimizePTestInstr(
1481 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1482 const MachineRegisterInfo *MRI) const {
1483 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1484 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1485 unsigned PredOpcode = Pred->getOpcode();
1486 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1487 if (!NewOp)
1488 return false;
1489
1491
1492 // If another instruction between Pred and PTest accesses flags, don't remove
1493 // the ptest or update the earlier instruction to modify them.
1494 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1495 return false;
1496
1497 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1498 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1499 // operand to be replaced with an equivalent instruction that also sets the
1500 // flags.
1501 PTest->eraseFromParent();
1502 if (*NewOp != PredOpcode) {
1503 Pred->setDesc(get(*NewOp));
1504 bool succeeded = UpdateOperandRegClass(*Pred);
1505 (void)succeeded;
1506 assert(succeeded && "Operands have incompatible register classes!");
1507 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1508 }
1509
1510 // Ensure that the flags def is live.
1511 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1512 unsigned i = 0, e = Pred->getNumOperands();
1513 for (; i != e; ++i) {
1514 MachineOperand &MO = Pred->getOperand(i);
1515 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1516 MO.setIsDead(false);
1517 break;
1518 }
1519 }
1520 }
1521 return true;
1522}
1523
1524/// Try to optimize a compare instruction. A compare instruction is an
1525/// instruction which produces AArch64::NZCV. It can be truly compare
1526/// instruction
1527/// when there are no uses of its destination register.
1528///
1529/// The following steps are tried in order:
1530/// 1. Convert CmpInstr into an unconditional version.
1531/// 2. Remove CmpInstr if above there is an instruction producing a needed
1532/// condition code or an instruction which can be converted into such an
1533/// instruction.
1534/// Only comparison with zero is supported.
1536 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1537 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1538 assert(CmpInstr.getParent());
1539 assert(MRI);
1540
1541 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1542 int DeadNZCVIdx =
1543 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1544 if (DeadNZCVIdx != -1) {
1545 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1546 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1547 CmpInstr.eraseFromParent();
1548 return true;
1549 }
1550 unsigned Opc = CmpInstr.getOpcode();
1551 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1552 if (NewOpc == Opc)
1553 return false;
1554 const MCInstrDesc &MCID = get(NewOpc);
1555 CmpInstr.setDesc(MCID);
1556 CmpInstr.removeOperand(DeadNZCVIdx);
1557 bool succeeded = UpdateOperandRegClass(CmpInstr);
1558 (void)succeeded;
1559 assert(succeeded && "Some operands reg class are incompatible!");
1560 return true;
1561 }
1562
1563 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1564 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY)
1565 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1566
1567 if (SrcReg2 != 0)
1568 return false;
1569
1570 // CmpInstr is a Compare instruction if destination register is not used.
1571 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1572 return false;
1573
1574 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1575 return true;
1576 return (CmpValue == 0 || CmpValue == 1) &&
1577 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1578}
1579
1580/// Get opcode of S version of Instr.
1581/// If Instr is S version its opcode is returned.
1582/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1583/// or we are not interested in it.
1584static unsigned sForm(MachineInstr &Instr) {
1585 switch (Instr.getOpcode()) {
1586 default:
1587 return AArch64::INSTRUCTION_LIST_END;
1588
1589 case AArch64::ADDSWrr:
1590 case AArch64::ADDSWri:
1591 case AArch64::ADDSXrr:
1592 case AArch64::ADDSXri:
1593 case AArch64::SUBSWrr:
1594 case AArch64::SUBSWri:
1595 case AArch64::SUBSXrr:
1596 case AArch64::SUBSXri:
1597 return Instr.getOpcode();
1598
1599 case AArch64::ADDWrr:
1600 return AArch64::ADDSWrr;
1601 case AArch64::ADDWri:
1602 return AArch64::ADDSWri;
1603 case AArch64::ADDXrr:
1604 return AArch64::ADDSXrr;
1605 case AArch64::ADDXri:
1606 return AArch64::ADDSXri;
1607 case AArch64::ADCWr:
1608 return AArch64::ADCSWr;
1609 case AArch64::ADCXr:
1610 return AArch64::ADCSXr;
1611 case AArch64::SUBWrr:
1612 return AArch64::SUBSWrr;
1613 case AArch64::SUBWri:
1614 return AArch64::SUBSWri;
1615 case AArch64::SUBXrr:
1616 return AArch64::SUBSXrr;
1617 case AArch64::SUBXri:
1618 return AArch64::SUBSXri;
1619 case AArch64::SBCWr:
1620 return AArch64::SBCSWr;
1621 case AArch64::SBCXr:
1622 return AArch64::SBCSXr;
1623 case AArch64::ANDWri:
1624 return AArch64::ANDSWri;
1625 case AArch64::ANDXri:
1626 return AArch64::ANDSXri;
1627 }
1628}
1629
1630/// Check if AArch64::NZCV should be alive in successors of MBB.
1632 for (auto *BB : MBB->successors())
1633 if (BB->isLiveIn(AArch64::NZCV))
1634 return true;
1635 return false;
1636}
1637
1638/// \returns The condition code operand index for \p Instr if it is a branch
1639/// or select and -1 otherwise.
1640static int
1642 switch (Instr.getOpcode()) {
1643 default:
1644 return -1;
1645
1646 case AArch64::Bcc: {
1647 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1648 assert(Idx >= 2);
1649 return Idx - 2;
1650 }
1651
1652 case AArch64::CSINVWr:
1653 case AArch64::CSINVXr:
1654 case AArch64::CSINCWr:
1655 case AArch64::CSINCXr:
1656 case AArch64::CSELWr:
1657 case AArch64::CSELXr:
1658 case AArch64::CSNEGWr:
1659 case AArch64::CSNEGXr:
1660 case AArch64::FCSELSrrr:
1661 case AArch64::FCSELDrrr: {
1662 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1663 assert(Idx >= 1);
1664 return Idx - 1;
1665 }
1666 }
1667}
1668
1669/// Find a condition code used by the instruction.
1670/// Returns AArch64CC::Invalid if either the instruction does not use condition
1671/// codes or we don't optimize CmpInstr in the presence of such instructions.
1674 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1675 Instr.getOperand(CCIdx).getImm())
1677}
1678
1681 UsedNZCV UsedFlags;
1682 switch (CC) {
1683 default:
1684 break;
1685
1686 case AArch64CC::EQ: // Z set
1687 case AArch64CC::NE: // Z clear
1688 UsedFlags.Z = true;
1689 break;
1690
1691 case AArch64CC::HI: // Z clear and C set
1692 case AArch64CC::LS: // Z set or C clear
1693 UsedFlags.Z = true;
1694 [[fallthrough]];
1695 case AArch64CC::HS: // C set
1696 case AArch64CC::LO: // C clear
1697 UsedFlags.C = true;
1698 break;
1699
1700 case AArch64CC::MI: // N set
1701 case AArch64CC::PL: // N clear
1702 UsedFlags.N = true;
1703 break;
1704
1705 case AArch64CC::VS: // V set
1706 case AArch64CC::VC: // V clear
1707 UsedFlags.V = true;
1708 break;
1709
1710 case AArch64CC::GT: // Z clear, N and V the same
1711 case AArch64CC::LE: // Z set, N and V differ
1712 UsedFlags.Z = true;
1713 [[fallthrough]];
1714 case AArch64CC::GE: // N and V the same
1715 case AArch64CC::LT: // N and V differ
1716 UsedFlags.N = true;
1717 UsedFlags.V = true;
1718 break;
1719 }
1720 return UsedFlags;
1721}
1722
1723/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1724/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1725/// \returns std::nullopt otherwise.
1726///
1727/// Collect instructions using that flags in \p CCUseInstrs if provided.
1728std::optional<UsedNZCV>
1730 const TargetRegisterInfo &TRI,
1731 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1732 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1733 if (MI.getParent() != CmpParent)
1734 return std::nullopt;
1735
1736 if (areCFlagsAliveInSuccessors(CmpParent))
1737 return std::nullopt;
1738
1739 UsedNZCV NZCVUsedAfterCmp;
1741 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1742 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1744 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1745 return std::nullopt;
1746 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1747 if (CCUseInstrs)
1748 CCUseInstrs->push_back(&Instr);
1749 }
1750 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1751 break;
1752 }
1753 return NZCVUsedAfterCmp;
1754}
1755
1756static bool isADDSRegImm(unsigned Opcode) {
1757 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1758}
1759
1760static bool isSUBSRegImm(unsigned Opcode) {
1761 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1762}
1763
1764/// Check if CmpInstr can be substituted by MI.
1765///
1766/// CmpInstr can be substituted:
1767/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1768/// - and, MI and CmpInstr are from the same MachineBB
1769/// - and, condition flags are not alive in successors of the CmpInstr parent
1770/// - and, if MI opcode is the S form there must be no defs of flags between
1771/// MI and CmpInstr
1772/// or if MI opcode is not the S form there must be neither defs of flags
1773/// nor uses of flags between MI and CmpInstr.
1774/// - and, if C/V flags are not used after CmpInstr
1775/// or if N flag is used but MI produces poison value if signed overflow
1776/// occurs.
1778 const TargetRegisterInfo &TRI) {
1779 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1780 // that may or may not set flags.
1781 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1782
1783 const unsigned CmpOpcode = CmpInstr.getOpcode();
1784 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1785 return false;
1786
1787 assert((CmpInstr.getOperand(2).isImm() &&
1788 CmpInstr.getOperand(2).getImm() == 0) &&
1789 "Caller guarantees that CmpInstr compares with constant 0");
1790
1791 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1792 if (!NZVCUsed || NZVCUsed->C)
1793 return false;
1794
1795 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1796 // '%vreg = add ...' or '%vreg = sub ...'.
1797 // Condition flag V is used to indicate signed overflow.
1798 // 1) MI and CmpInstr set N and V to the same value.
1799 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1800 // signed overflow occurs, so CmpInstr could still be simplified away.
1801 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1802 return false;
1803
1804 AccessKind AccessToCheck = AK_Write;
1805 if (sForm(MI) != MI.getOpcode())
1806 AccessToCheck = AK_All;
1807 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1808}
1809
1810/// Substitute an instruction comparing to zero with another instruction
1811/// which produces needed condition flags.
1812///
1813/// Return true on success.
1814bool AArch64InstrInfo::substituteCmpToZero(
1815 MachineInstr &CmpInstr, unsigned SrcReg,
1816 const MachineRegisterInfo &MRI) const {
1817 // Get the unique definition of SrcReg.
1818 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1819 if (!MI)
1820 return false;
1821
1823
1824 unsigned NewOpc = sForm(*MI);
1825 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1826 return false;
1827
1828 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1829 return false;
1830
1831 // Update the instruction to set NZCV.
1832 MI->setDesc(get(NewOpc));
1833 CmpInstr.eraseFromParent();
1834 bool succeeded = UpdateOperandRegClass(*MI);
1835 (void)succeeded;
1836 assert(succeeded && "Some operands reg class are incompatible!");
1837 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1838 return true;
1839}
1840
1841/// \returns True if \p CmpInstr can be removed.
1842///
1843/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1844/// codes used in \p CCUseInstrs must be inverted.
1846 int CmpValue, const TargetRegisterInfo &TRI,
1848 bool &IsInvertCC) {
1849 assert((CmpValue == 0 || CmpValue == 1) &&
1850 "Only comparisons to 0 or 1 considered for removal!");
1851
1852 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
1853 unsigned MIOpc = MI.getOpcode();
1854 if (MIOpc == AArch64::CSINCWr) {
1855 if (MI.getOperand(1).getReg() != AArch64::WZR ||
1856 MI.getOperand(2).getReg() != AArch64::WZR)
1857 return false;
1858 } else if (MIOpc == AArch64::CSINCXr) {
1859 if (MI.getOperand(1).getReg() != AArch64::XZR ||
1860 MI.getOperand(2).getReg() != AArch64::XZR)
1861 return false;
1862 } else {
1863 return false;
1864 }
1866 if (MICC == AArch64CC::Invalid)
1867 return false;
1868
1869 // NZCV needs to be defined
1870 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
1871 return false;
1872
1873 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
1874 const unsigned CmpOpcode = CmpInstr.getOpcode();
1875 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
1876 if (CmpValue && !IsSubsRegImm)
1877 return false;
1878 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
1879 return false;
1880
1881 // MI conditions allowed: eq, ne, mi, pl
1882 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
1883 if (MIUsedNZCV.C || MIUsedNZCV.V)
1884 return false;
1885
1886 std::optional<UsedNZCV> NZCVUsedAfterCmp =
1887 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
1888 // Condition flags are not used in CmpInstr basic block successors and only
1889 // Z or N flags allowed to be used after CmpInstr within its basic block
1890 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
1891 return false;
1892 // Z or N flag used after CmpInstr must correspond to the flag used in MI
1893 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
1894 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
1895 return false;
1896 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
1897 if (MIUsedNZCV.N && !CmpValue)
1898 return false;
1899
1900 // There must be no defs of flags between MI and CmpInstr
1901 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
1902 return false;
1903
1904 // Condition code is inverted in the following cases:
1905 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1906 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
1907 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
1908 (!CmpValue && MICC == AArch64CC::NE);
1909 return true;
1910}
1911
1912/// Remove comparison in csinc-cmp sequence
1913///
1914/// Examples:
1915/// 1. \code
1916/// csinc w9, wzr, wzr, ne
1917/// cmp w9, #0
1918/// b.eq
1919/// \endcode
1920/// to
1921/// \code
1922/// csinc w9, wzr, wzr, ne
1923/// b.ne
1924/// \endcode
1925///
1926/// 2. \code
1927/// csinc x2, xzr, xzr, mi
1928/// cmp x2, #1
1929/// b.pl
1930/// \endcode
1931/// to
1932/// \code
1933/// csinc x2, xzr, xzr, mi
1934/// b.pl
1935/// \endcode
1936///
1937/// \param CmpInstr comparison instruction
1938/// \return True when comparison removed
1939bool AArch64InstrInfo::removeCmpToZeroOrOne(
1940 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
1941 const MachineRegisterInfo &MRI) const {
1942 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1943 if (!MI)
1944 return false;
1947 bool IsInvertCC = false;
1948 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
1949 IsInvertCC))
1950 return false;
1951 // Make transformation
1952 CmpInstr.eraseFromParent();
1953 if (IsInvertCC) {
1954 // Invert condition codes in CmpInstr CC users
1955 for (MachineInstr *CCUseInstr : CCUseInstrs) {
1957 assert(Idx >= 0 && "Unexpected instruction using CC.");
1958 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
1960 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
1961 CCOperand.setImm(CCUse);
1962 }
1963 }
1964 return true;
1965}
1966
1968 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
1969 MI.getOpcode() != AArch64::CATCHRET)
1970 return false;
1971
1972 MachineBasicBlock &MBB = *MI.getParent();
1973 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1974 auto TRI = Subtarget.getRegisterInfo();
1975 DebugLoc DL = MI.getDebugLoc();
1976
1977 if (MI.getOpcode() == AArch64::CATCHRET) {
1978 // Skip to the first instruction before the epilog.
1979 const TargetInstrInfo *TII =
1981 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
1983 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
1984 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
1985 FirstEpilogSEH != MBB.begin())
1986 FirstEpilogSEH = std::prev(FirstEpilogSEH);
1987 if (FirstEpilogSEH != MBB.begin())
1988 FirstEpilogSEH = std::next(FirstEpilogSEH);
1989 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
1990 .addReg(AArch64::X0, RegState::Define)
1991 .addMBB(TargetMBB);
1992 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
1993 .addReg(AArch64::X0, RegState::Define)
1994 .addReg(AArch64::X0)
1995 .addMBB(TargetMBB)
1996 .addImm(0);
1997 return true;
1998 }
1999
2000 Register Reg = MI.getOperand(0).getReg();
2002 if (M.getStackProtectorGuard() == "sysreg") {
2003 const AArch64SysReg::SysReg *SrcReg =
2004 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2005 if (!SrcReg)
2006 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2007
2008 // mrs xN, sysreg
2009 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2011 .addImm(SrcReg->Encoding);
2012 int Offset = M.getStackProtectorGuardOffset();
2013 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2014 // ldr xN, [xN, #offset]
2015 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2016 .addDef(Reg)
2017 .addUse(Reg, RegState::Kill)
2018 .addImm(Offset / 8);
2019 } else if (Offset >= -256 && Offset <= 255) {
2020 // ldur xN, [xN, #offset]
2021 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2022 .addDef(Reg)
2023 .addUse(Reg, RegState::Kill)
2024 .addImm(Offset);
2025 } else if (Offset >= -4095 && Offset <= 4095) {
2026 if (Offset > 0) {
2027 // add xN, xN, #offset
2028 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2029 .addDef(Reg)
2030 .addUse(Reg, RegState::Kill)
2031 .addImm(Offset)
2032 .addImm(0);
2033 } else {
2034 // sub xN, xN, #offset
2035 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2036 .addDef(Reg)
2037 .addUse(Reg, RegState::Kill)
2038 .addImm(-Offset)
2039 .addImm(0);
2040 }
2041 // ldr xN, [xN]
2042 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2043 .addDef(Reg)
2044 .addUse(Reg, RegState::Kill)
2045 .addImm(0);
2046 } else {
2047 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2048 // than 23760.
2049 // It might be nice to use AArch64::MOVi32imm here, which would get
2050 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2051 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2052 // AArch64FrameLowering might help us find such a scratch register
2053 // though. If we failed to find a scratch register, we could emit a
2054 // stream of add instructions to build up the immediate. Or, we could try
2055 // to insert a AArch64::MOVi32imm before register allocation so that we
2056 // didn't need to scavenge for a scratch register.
2057 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2058 }
2059 MBB.erase(MI);
2060 return true;
2061 }
2062
2063 const GlobalValue *GV =
2064 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2065 const TargetMachine &TM = MBB.getParent()->getTarget();
2066 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2067 const unsigned char MO_NC = AArch64II::MO_NC;
2068
2069 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2070 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2071 .addGlobalAddress(GV, 0, OpFlags);
2072 if (Subtarget.isTargetILP32()) {
2073 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2074 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2075 .addDef(Reg32, RegState::Dead)
2076 .addUse(Reg, RegState::Kill)
2077 .addImm(0)
2078 .addMemOperand(*MI.memoperands_begin())
2080 } else {
2081 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2082 .addReg(Reg, RegState::Kill)
2083 .addImm(0)
2084 .addMemOperand(*MI.memoperands_begin());
2085 }
2086 } else if (TM.getCodeModel() == CodeModel::Large) {
2087 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2088 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2089 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2090 .addImm(0);
2091 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2092 .addReg(Reg, RegState::Kill)
2093 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2094 .addImm(16);
2095 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2096 .addReg(Reg, RegState::Kill)
2097 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2098 .addImm(32);
2099 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2100 .addReg(Reg, RegState::Kill)
2102 .addImm(48);
2103 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2104 .addReg(Reg, RegState::Kill)
2105 .addImm(0)
2106 .addMemOperand(*MI.memoperands_begin());
2107 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2108 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2109 .addGlobalAddress(GV, 0, OpFlags);
2110 } else {
2111 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2112 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2113 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2114 if (Subtarget.isTargetILP32()) {
2115 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2116 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2117 .addDef(Reg32, RegState::Dead)
2118 .addUse(Reg, RegState::Kill)
2119 .addGlobalAddress(GV, 0, LoFlags)
2120 .addMemOperand(*MI.memoperands_begin())
2122 } else {
2123 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2124 .addReg(Reg, RegState::Kill)
2125 .addGlobalAddress(GV, 0, LoFlags)
2126 .addMemOperand(*MI.memoperands_begin());
2127 }
2128 }
2129
2130 MBB.erase(MI);
2131
2132 return true;
2133}
2134
2135// Return true if this instruction simply sets its single destination register
2136// to zero. This is equivalent to a register rename of the zero-register.
2138 switch (MI.getOpcode()) {
2139 default:
2140 break;
2141 case AArch64::MOVZWi:
2142 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2143 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2144 assert(MI.getDesc().getNumOperands() == 3 &&
2145 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2146 return true;
2147 }
2148 break;
2149 case AArch64::ANDWri: // and Rd, Rzr, #imm
2150 return MI.getOperand(1).getReg() == AArch64::WZR;
2151 case AArch64::ANDXri:
2152 return MI.getOperand(1).getReg() == AArch64::XZR;
2153 case TargetOpcode::COPY:
2154 return MI.getOperand(1).getReg() == AArch64::WZR;
2155 }
2156 return false;
2157}
2158
2159// Return true if this instruction simply renames a general register without
2160// modifying bits.
2162 switch (MI.getOpcode()) {
2163 default:
2164 break;
2165 case TargetOpcode::COPY: {
2166 // GPR32 copies will by lowered to ORRXrs
2167 Register DstReg = MI.getOperand(0).getReg();
2168 return (AArch64::GPR32RegClass.contains(DstReg) ||
2169 AArch64::GPR64RegClass.contains(DstReg));
2170 }
2171 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2172 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2173 assert(MI.getDesc().getNumOperands() == 4 &&
2174 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2175 return true;
2176 }
2177 break;
2178 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2179 if (MI.getOperand(2).getImm() == 0) {
2180 assert(MI.getDesc().getNumOperands() == 4 &&
2181 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2182 return true;
2183 }
2184 break;
2185 }
2186 return false;
2187}
2188
2189// Return true if this instruction simply renames a general register without
2190// modifying bits.
2192 switch (MI.getOpcode()) {
2193 default:
2194 break;
2195 case TargetOpcode::COPY: {
2196 Register DstReg = MI.getOperand(0).getReg();
2197 return AArch64::FPR128RegClass.contains(DstReg);
2198 }
2199 case AArch64::ORRv16i8:
2200 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2201 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2202 "invalid ORRv16i8 operands");
2203 return true;
2204 }
2205 break;
2206 }
2207 return false;
2208}
2209
2211 int &FrameIndex) const {
2212 switch (MI.getOpcode()) {
2213 default:
2214 break;
2215 case AArch64::LDRWui:
2216 case AArch64::LDRXui:
2217 case AArch64::LDRBui:
2218 case AArch64::LDRHui:
2219 case AArch64::LDRSui:
2220 case AArch64::LDRDui:
2221 case AArch64::LDRQui:
2222 case AArch64::LDR_PXI:
2223 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2224 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2225 FrameIndex = MI.getOperand(1).getIndex();
2226 return MI.getOperand(0).getReg();
2227 }
2228 break;
2229 }
2230
2231 return 0;
2232}
2233
2235 int &FrameIndex) const {
2236 switch (MI.getOpcode()) {
2237 default:
2238 break;
2239 case AArch64::STRWui:
2240 case AArch64::STRXui:
2241 case AArch64::STRBui:
2242 case AArch64::STRHui:
2243 case AArch64::STRSui:
2244 case AArch64::STRDui:
2245 case AArch64::STRQui:
2246 case AArch64::STR_PXI:
2247 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2248 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2249 FrameIndex = MI.getOperand(1).getIndex();
2250 return MI.getOperand(0).getReg();
2251 }
2252 break;
2253 }
2254 return 0;
2255}
2256
2257/// Check all MachineMemOperands for a hint to suppress pairing.
2259 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2260 return MMO->getFlags() & MOSuppressPair;
2261 });
2262}
2263
2264/// Set a flag on the first MachineMemOperand to suppress pairing.
2266 if (MI.memoperands_empty())
2267 return;
2268 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2269}
2270
2271/// Check all MachineMemOperands for a hint that the load/store is strided.
2273 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2274 return MMO->getFlags() & MOStridedAccess;
2275 });
2276}
2277
2279 switch (Opc) {
2280 default:
2281 return false;
2282 case AArch64::STURSi:
2283 case AArch64::STRSpre:
2284 case AArch64::STURDi:
2285 case AArch64::STRDpre:
2286 case AArch64::STURQi:
2287 case AArch64::STRQpre:
2288 case AArch64::STURBBi:
2289 case AArch64::STURHHi:
2290 case AArch64::STURWi:
2291 case AArch64::STRWpre:
2292 case AArch64::STURXi:
2293 case AArch64::STRXpre:
2294 case AArch64::LDURSi:
2295 case AArch64::LDRSpre:
2296 case AArch64::LDURDi:
2297 case AArch64::LDRDpre:
2298 case AArch64::LDURQi:
2299 case AArch64::LDRQpre:
2300 case AArch64::LDURWi:
2301 case AArch64::LDRWpre:
2302 case AArch64::LDURXi:
2303 case AArch64::LDRXpre:
2304 case AArch64::LDRSWpre:
2305 case AArch64::LDURSWi:
2306 case AArch64::LDURHHi:
2307 case AArch64::LDURBBi:
2308 case AArch64::LDURSBWi:
2309 case AArch64::LDURSHWi:
2310 return true;
2311 }
2312}
2313
2314std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2315 switch (Opc) {
2316 default: return {};
2317 case AArch64::PRFMui: return AArch64::PRFUMi;
2318 case AArch64::LDRXui: return AArch64::LDURXi;
2319 case AArch64::LDRWui: return AArch64::LDURWi;
2320 case AArch64::LDRBui: return AArch64::LDURBi;
2321 case AArch64::LDRHui: return AArch64::LDURHi;
2322 case AArch64::LDRSui: return AArch64::LDURSi;
2323 case AArch64::LDRDui: return AArch64::LDURDi;
2324 case AArch64::LDRQui: return AArch64::LDURQi;
2325 case AArch64::LDRBBui: return AArch64::LDURBBi;
2326 case AArch64::LDRHHui: return AArch64::LDURHHi;
2327 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2328 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2329 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2330 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2331 case AArch64::LDRSWui: return AArch64::LDURSWi;
2332 case AArch64::STRXui: return AArch64::STURXi;
2333 case AArch64::STRWui: return AArch64::STURWi;
2334 case AArch64::STRBui: return AArch64::STURBi;
2335 case AArch64::STRHui: return AArch64::STURHi;
2336 case AArch64::STRSui: return AArch64::STURSi;
2337 case AArch64::STRDui: return AArch64::STURDi;
2338 case AArch64::STRQui: return AArch64::STURQi;
2339 case AArch64::STRBBui: return AArch64::STURBBi;
2340 case AArch64::STRHHui: return AArch64::STURHHi;
2341 }
2342}
2343
2345 switch (Opc) {
2346 default:
2347 return 2;
2348 case AArch64::LDPXi:
2349 case AArch64::LDPDi:
2350 case AArch64::STPXi:
2351 case AArch64::STPDi:
2352 case AArch64::LDNPXi:
2353 case AArch64::LDNPDi:
2354 case AArch64::STNPXi:
2355 case AArch64::STNPDi:
2356 case AArch64::LDPQi:
2357 case AArch64::STPQi:
2358 case AArch64::LDNPQi:
2359 case AArch64::STNPQi:
2360 case AArch64::LDPWi:
2361 case AArch64::LDPSi:
2362 case AArch64::STPWi:
2363 case AArch64::STPSi:
2364 case AArch64::LDNPWi:
2365 case AArch64::LDNPSi:
2366 case AArch64::STNPWi:
2367 case AArch64::STNPSi:
2368 case AArch64::LDG:
2369 case AArch64::STGPi:
2370
2371 case AArch64::LD1B_IMM:
2372 case AArch64::LD1B_H_IMM:
2373 case AArch64::LD1B_S_IMM:
2374 case AArch64::LD1B_D_IMM:
2375 case AArch64::LD1SB_H_IMM:
2376 case AArch64::LD1SB_S_IMM:
2377 case AArch64::LD1SB_D_IMM:
2378 case AArch64::LD1H_IMM:
2379 case AArch64::LD1H_S_IMM:
2380 case AArch64::LD1H_D_IMM:
2381 case AArch64::LD1SH_S_IMM:
2382 case AArch64::LD1SH_D_IMM:
2383 case AArch64::LD1W_IMM:
2384 case AArch64::LD1W_D_IMM:
2385 case AArch64::LD1SW_D_IMM:
2386 case AArch64::LD1D_IMM:
2387
2388 case AArch64::LD2B_IMM:
2389 case AArch64::LD2H_IMM:
2390 case AArch64::LD2W_IMM:
2391 case AArch64::LD2D_IMM:
2392 case AArch64::LD3B_IMM:
2393 case AArch64::LD3H_IMM:
2394 case AArch64::LD3W_IMM:
2395 case AArch64::LD3D_IMM:
2396 case AArch64::LD4B_IMM:
2397 case AArch64::LD4H_IMM:
2398 case AArch64::LD4W_IMM:
2399 case AArch64::LD4D_IMM:
2400
2401 case AArch64::ST1B_IMM:
2402 case AArch64::ST1B_H_IMM:
2403 case AArch64::ST1B_S_IMM:
2404 case AArch64::ST1B_D_IMM:
2405 case AArch64::ST1H_IMM:
2406 case AArch64::ST1H_S_IMM:
2407 case AArch64::ST1H_D_IMM:
2408 case AArch64::ST1W_IMM:
2409 case AArch64::ST1W_D_IMM:
2410 case AArch64::ST1D_IMM:
2411
2412 case AArch64::ST2B_IMM:
2413 case AArch64::ST2H_IMM:
2414 case AArch64::ST2W_IMM:
2415 case AArch64::ST2D_IMM:
2416 case AArch64::ST3B_IMM:
2417 case AArch64::ST3H_IMM:
2418 case AArch64::ST3W_IMM:
2419 case AArch64::ST3D_IMM:
2420 case AArch64::ST4B_IMM:
2421 case AArch64::ST4H_IMM:
2422 case AArch64::ST4W_IMM:
2423 case AArch64::ST4D_IMM:
2424
2425 case AArch64::LD1RB_IMM:
2426 case AArch64::LD1RB_H_IMM:
2427 case AArch64::LD1RB_S_IMM:
2428 case AArch64::LD1RB_D_IMM:
2429 case AArch64::LD1RSB_H_IMM:
2430 case AArch64::LD1RSB_S_IMM:
2431 case AArch64::LD1RSB_D_IMM:
2432 case AArch64::LD1RH_IMM:
2433 case AArch64::LD1RH_S_IMM:
2434 case AArch64::LD1RH_D_IMM:
2435 case AArch64::LD1RSH_S_IMM:
2436 case AArch64::LD1RSH_D_IMM:
2437 case AArch64::LD1RW_IMM:
2438 case AArch64::LD1RW_D_IMM:
2439 case AArch64::LD1RSW_IMM:
2440 case AArch64::LD1RD_IMM:
2441
2442 case AArch64::LDNT1B_ZRI:
2443 case AArch64::LDNT1H_ZRI:
2444 case AArch64::LDNT1W_ZRI:
2445 case AArch64::LDNT1D_ZRI:
2446 case AArch64::STNT1B_ZRI:
2447 case AArch64::STNT1H_ZRI:
2448 case AArch64::STNT1W_ZRI:
2449 case AArch64::STNT1D_ZRI:
2450
2451 case AArch64::LDNF1B_IMM:
2452 case AArch64::LDNF1B_H_IMM:
2453 case AArch64::LDNF1B_S_IMM:
2454 case AArch64::LDNF1B_D_IMM:
2455 case AArch64::LDNF1SB_H_IMM:
2456 case AArch64::LDNF1SB_S_IMM:
2457 case AArch64::LDNF1SB_D_IMM:
2458 case AArch64::LDNF1H_IMM:
2459 case AArch64::LDNF1H_S_IMM:
2460 case AArch64::LDNF1H_D_IMM:
2461 case AArch64::LDNF1SH_S_IMM:
2462 case AArch64::LDNF1SH_D_IMM:
2463 case AArch64::LDNF1W_IMM:
2464 case AArch64::LDNF1W_D_IMM:
2465 case AArch64::LDNF1SW_D_IMM:
2466 case AArch64::LDNF1D_IMM:
2467 return 3;
2468 case AArch64::ADDG:
2469 case AArch64::STGi:
2470 case AArch64::LDR_PXI:
2471 case AArch64::STR_PXI:
2472 return 2;
2473 }
2474}
2475
2477 switch (MI.getOpcode()) {
2478 default:
2479 return false;
2480 // Scaled instructions.
2481 case AArch64::STRSui:
2482 case AArch64::STRDui:
2483 case AArch64::STRQui:
2484 case AArch64::STRXui:
2485 case AArch64::STRWui:
2486 case AArch64::LDRSui:
2487 case AArch64::LDRDui:
2488 case AArch64::LDRQui:
2489 case AArch64::LDRXui:
2490 case AArch64::LDRWui:
2491 case AArch64::LDRSWui:
2492 // Unscaled instructions.
2493 case AArch64::STURSi:
2494 case AArch64::STRSpre:
2495 case AArch64::STURDi:
2496 case AArch64::STRDpre:
2497 case AArch64::STURQi:
2498 case AArch64::STRQpre:
2499 case AArch64::STURWi:
2500 case AArch64::STRWpre:
2501 case AArch64::STURXi:
2502 case AArch64::STRXpre:
2503 case AArch64::LDURSi:
2504 case AArch64::LDRSpre:
2505 case AArch64::LDURDi:
2506 case AArch64::LDRDpre:
2507 case AArch64::LDURQi:
2508 case AArch64::LDRQpre:
2509 case AArch64::LDURWi:
2510 case AArch64::LDRWpre:
2511 case AArch64::LDURXi:
2512 case AArch64::LDRXpre:
2513 case AArch64::LDURSWi:
2514 case AArch64::LDRSWpre:
2515 return true;
2516 }
2517}
2518
2520 switch (MI.getOpcode()) {
2521 default:
2522 assert((!MI.isCall() || !MI.isReturn()) &&
2523 "Unexpected instruction - was a new tail call opcode introduced?");
2524 return false;
2525 case AArch64::TCRETURNdi:
2526 case AArch64::TCRETURNri:
2527 case AArch64::TCRETURNrix16x17:
2528 case AArch64::TCRETURNrix17:
2529 case AArch64::TCRETURNrinotx16:
2530 case AArch64::TCRETURNriALL:
2531 case AArch64::AUTH_TCRETURN:
2532 case AArch64::AUTH_TCRETURN_BTI:
2533 return true;
2534 }
2535}
2536
2538 switch (Opc) {
2539 default:
2540 llvm_unreachable("Opcode has no flag setting equivalent!");
2541 // 32-bit cases:
2542 case AArch64::ADDWri:
2543 return AArch64::ADDSWri;
2544 case AArch64::ADDWrr:
2545 return AArch64::ADDSWrr;
2546 case AArch64::ADDWrs:
2547 return AArch64::ADDSWrs;
2548 case AArch64::ADDWrx:
2549 return AArch64::ADDSWrx;
2550 case AArch64::ANDWri:
2551 return AArch64::ANDSWri;
2552 case AArch64::ANDWrr:
2553 return AArch64::ANDSWrr;
2554 case AArch64::ANDWrs:
2555 return AArch64::ANDSWrs;
2556 case AArch64::BICWrr:
2557 return AArch64::BICSWrr;
2558 case AArch64::BICWrs:
2559 return AArch64::BICSWrs;
2560 case AArch64::SUBWri:
2561 return AArch64::SUBSWri;
2562 case AArch64::SUBWrr:
2563 return AArch64::SUBSWrr;
2564 case AArch64::SUBWrs:
2565 return AArch64::SUBSWrs;
2566 case AArch64::SUBWrx:
2567 return AArch64::SUBSWrx;
2568 // 64-bit cases:
2569 case AArch64::ADDXri:
2570 return AArch64::ADDSXri;
2571 case AArch64::ADDXrr:
2572 return AArch64::ADDSXrr;
2573 case AArch64::ADDXrs:
2574 return AArch64::ADDSXrs;
2575 case AArch64::ADDXrx:
2576 return AArch64::ADDSXrx;
2577 case AArch64::ANDXri:
2578 return AArch64::ANDSXri;
2579 case AArch64::ANDXrr:
2580 return AArch64::ANDSXrr;
2581 case AArch64::ANDXrs:
2582 return AArch64::ANDSXrs;
2583 case AArch64::BICXrr:
2584 return AArch64::BICSXrr;
2585 case AArch64::BICXrs:
2586 return AArch64::BICSXrs;
2587 case AArch64::SUBXri:
2588 return AArch64::SUBSXri;
2589 case AArch64::SUBXrr:
2590 return AArch64::SUBSXrr;
2591 case AArch64::SUBXrs:
2592 return AArch64::SUBSXrs;
2593 case AArch64::SUBXrx:
2594 return AArch64::SUBSXrx;
2595 // SVE instructions:
2596 case AArch64::AND_PPzPP:
2597 return AArch64::ANDS_PPzPP;
2598 case AArch64::BIC_PPzPP:
2599 return AArch64::BICS_PPzPP;
2600 case AArch64::EOR_PPzPP:
2601 return AArch64::EORS_PPzPP;
2602 case AArch64::NAND_PPzPP:
2603 return AArch64::NANDS_PPzPP;
2604 case AArch64::NOR_PPzPP:
2605 return AArch64::NORS_PPzPP;
2606 case AArch64::ORN_PPzPP:
2607 return AArch64::ORNS_PPzPP;
2608 case AArch64::ORR_PPzPP:
2609 return AArch64::ORRS_PPzPP;
2610 case AArch64::BRKA_PPzP:
2611 return AArch64::BRKAS_PPzP;
2612 case AArch64::BRKPA_PPzPP:
2613 return AArch64::BRKPAS_PPzPP;
2614 case AArch64::BRKB_PPzP:
2615 return AArch64::BRKBS_PPzP;
2616 case AArch64::BRKPB_PPzPP:
2617 return AArch64::BRKPBS_PPzPP;
2618 case AArch64::BRKN_PPzP:
2619 return AArch64::BRKNS_PPzP;
2620 case AArch64::RDFFR_PPz:
2621 return AArch64::RDFFRS_PPz;
2622 case AArch64::PTRUE_B:
2623 return AArch64::PTRUES_B;
2624 }
2625}
2626
2627// Is this a candidate for ld/st merging or pairing? For example, we don't
2628// touch volatiles or load/stores that have a hint to avoid pair formation.
2630
2631 bool IsPreLdSt = isPreLdSt(MI);
2632
2633 // If this is a volatile load/store, don't mess with it.
2634 if (MI.hasOrderedMemoryRef())
2635 return false;
2636
2637 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2638 // For Pre-inc LD/ST, the operand is shifted by one.
2639 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2640 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2641 "Expected a reg or frame index operand.");
2642
2643 // For Pre-indexed addressing quadword instructions, the third operand is the
2644 // immediate value.
2645 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2646
2647 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2648 return false;
2649
2650 // Can't merge/pair if the instruction modifies the base register.
2651 // e.g., ldr x0, [x0]
2652 // This case will never occur with an FI base.
2653 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2654 // STR<S,D,Q,W,X>pre, it can be merged.
2655 // For example:
2656 // ldr q0, [x11, #32]!
2657 // ldr q1, [x11, #16]
2658 // to
2659 // ldp q0, q1, [x11, #32]!
2660 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2661 Register BaseReg = MI.getOperand(1).getReg();
2663 if (MI.modifiesRegister(BaseReg, TRI))
2664 return false;
2665 }
2666
2667 // Check if this load/store has a hint to avoid pair formation.
2668 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2670 return false;
2671
2672 // Do not pair any callee-save store/reload instructions in the
2673 // prologue/epilogue if the CFI information encoded the operations as separate
2674 // instructions, as that will cause the size of the actual prologue to mismatch
2675 // with the prologue size recorded in the Windows CFI.
2676 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2677 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2678 MI.getMF()->getFunction().needsUnwindTableEntry();
2679 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2681 return false;
2682
2683 // On some CPUs quad load/store pairs are slower than two single load/stores.
2684 if (Subtarget.isPaired128Slow()) {
2685 switch (MI.getOpcode()) {
2686 default:
2687 break;
2688 case AArch64::LDURQi:
2689 case AArch64::STURQi:
2690 case AArch64::LDRQui:
2691 case AArch64::STRQui:
2692 return false;
2693 }
2694 }
2695
2696 return true;
2697}
2698
2701 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
2702 const TargetRegisterInfo *TRI) const {
2703 if (!LdSt.mayLoadOrStore())
2704 return false;
2705
2706 const MachineOperand *BaseOp;
2707 TypeSize WidthN(0, false);
2708 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
2709 WidthN, TRI))
2710 return false;
2711 // The maximum vscale is 16 under AArch64, return the maximal extent for the
2712 // vector.
2713 Width = LocationSize::precise(WidthN);
2714 BaseOps.push_back(BaseOp);
2715 return true;
2716}
2717
2718std::optional<ExtAddrMode>
2720 const TargetRegisterInfo *TRI) const {
2721 const MachineOperand *Base; // Filled with the base operand of MI.
2722 int64_t Offset; // Filled with the offset of MI.
2723 bool OffsetIsScalable;
2724 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
2725 return std::nullopt;
2726
2727 if (!Base->isReg())
2728 return std::nullopt;
2729 ExtAddrMode AM;
2730 AM.BaseReg = Base->getReg();
2731 AM.Displacement = Offset;
2732 AM.ScaledReg = 0;
2733 AM.Scale = 0;
2734 return AM;
2735}
2736
2738 Register Reg,
2739 const MachineInstr &AddrI,
2740 ExtAddrMode &AM) const {
2741 // Filter out instructions into which we cannot fold.
2742 unsigned NumBytes;
2743 int64_t OffsetScale = 1;
2744 switch (MemI.getOpcode()) {
2745 default:
2746 return false;
2747
2748 case AArch64::LDURQi:
2749 case AArch64::STURQi:
2750 NumBytes = 16;
2751 break;
2752
2753 case AArch64::LDURDi:
2754 case AArch64::STURDi:
2755 case AArch64::LDURXi:
2756 case AArch64::STURXi:
2757 NumBytes = 8;
2758 break;
2759
2760 case AArch64::LDURWi:
2761 case AArch64::LDURSWi:
2762 case AArch64::STURWi:
2763 NumBytes = 4;
2764 break;
2765
2766 case AArch64::LDURHi:
2767 case AArch64::STURHi:
2768 case AArch64::LDURHHi:
2769 case AArch64::STURHHi:
2770 case AArch64::LDURSHXi:
2771 case AArch64::LDURSHWi:
2772 NumBytes = 2;
2773 break;
2774
2775 case AArch64::LDRBroX:
2776 case AArch64::LDRBBroX:
2777 case AArch64::LDRSBXroX:
2778 case AArch64::LDRSBWroX:
2779 case AArch64::STRBroX:
2780 case AArch64::STRBBroX:
2781 case AArch64::LDURBi:
2782 case AArch64::LDURBBi:
2783 case AArch64::LDURSBXi:
2784 case AArch64::LDURSBWi:
2785 case AArch64::STURBi:
2786 case AArch64::STURBBi:
2787 case AArch64::LDRBui:
2788 case AArch64::LDRBBui:
2789 case AArch64::LDRSBXui:
2790 case AArch64::LDRSBWui:
2791 case AArch64::STRBui:
2792 case AArch64::STRBBui:
2793 NumBytes = 1;
2794 break;
2795
2796 case AArch64::LDRQroX:
2797 case AArch64::STRQroX:
2798 case AArch64::LDRQui:
2799 case AArch64::STRQui:
2800 NumBytes = 16;
2801 OffsetScale = 16;
2802 break;
2803
2804 case AArch64::LDRDroX:
2805 case AArch64::STRDroX:
2806 case AArch64::LDRXroX:
2807 case AArch64::STRXroX:
2808 case AArch64::LDRDui:
2809 case AArch64::STRDui:
2810 case AArch64::LDRXui:
2811 case AArch64::STRXui:
2812 NumBytes = 8;
2813 OffsetScale = 8;
2814 break;
2815
2816 case AArch64::LDRWroX:
2817 case AArch64::LDRSWroX:
2818 case AArch64::STRWroX:
2819 case AArch64::LDRWui:
2820 case AArch64::LDRSWui:
2821 case AArch64::STRWui:
2822 NumBytes = 4;
2823 OffsetScale = 4;
2824 break;
2825
2826 case AArch64::LDRHroX:
2827 case AArch64::STRHroX:
2828 case AArch64::LDRHHroX:
2829 case AArch64::STRHHroX:
2830 case AArch64::LDRSHXroX:
2831 case AArch64::LDRSHWroX:
2832 case AArch64::LDRHui:
2833 case AArch64::STRHui:
2834 case AArch64::LDRHHui:
2835 case AArch64::STRHHui:
2836 case AArch64::LDRSHXui:
2837 case AArch64::LDRSHWui:
2838 NumBytes = 2;
2839 OffsetScale = 2;
2840 break;
2841 }
2842
2843 // Check the fold operand is not the loaded/stored value.
2844 const MachineOperand &BaseRegOp = MemI.getOperand(0);
2845 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
2846 return false;
2847
2848 // Handle memory instructions with a [Reg, Reg] addressing mode.
2849 if (MemI.getOperand(2).isReg()) {
2850 // Bail if the addressing mode already includes extension of the offset
2851 // register.
2852 if (MemI.getOperand(3).getImm())
2853 return false;
2854
2855 // Check if we actually have a scaled offset.
2856 if (MemI.getOperand(4).getImm() == 0)
2857 OffsetScale = 1;
2858
2859 // If the address instructions is folded into the base register, then the
2860 // addressing mode must not have a scale. Then we can swap the base and the
2861 // scaled registers.
2862 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
2863 return false;
2864
2865 switch (AddrI.getOpcode()) {
2866 default:
2867 return false;
2868
2869 case AArch64::SBFMXri:
2870 // sxtw Xa, Wm
2871 // ldr Xd, [Xn, Xa, lsl #N]
2872 // ->
2873 // ldr Xd, [Xn, Wm, sxtw #N]
2874 if (AddrI.getOperand(2).getImm() != 0 ||
2875 AddrI.getOperand(3).getImm() != 31)
2876 return false;
2877
2878 AM.BaseReg = MemI.getOperand(1).getReg();
2879 if (AM.BaseReg == Reg)
2880 AM.BaseReg = MemI.getOperand(2).getReg();
2881 AM.ScaledReg = AddrI.getOperand(1).getReg();
2882 AM.Scale = OffsetScale;
2883 AM.Displacement = 0;
2885 return true;
2886
2887 case TargetOpcode::SUBREG_TO_REG: {
2888 // mov Wa, Wm
2889 // ldr Xd, [Xn, Xa, lsl #N]
2890 // ->
2891 // ldr Xd, [Xn, Wm, uxtw #N]
2892
2893 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
2894 if (AddrI.getOperand(1).getImm() != 0 ||
2895 AddrI.getOperand(3).getImm() != AArch64::sub_32)
2896 return false;
2897
2898 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
2899 Register OffsetReg = AddrI.getOperand(2).getReg();
2900 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
2901 return false;
2902
2903 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
2904 if (DefMI.getOpcode() != AArch64::ORRWrs ||
2905 DefMI.getOperand(1).getReg() != AArch64::WZR ||
2906 DefMI.getOperand(3).getImm() != 0)
2907 return false;
2908
2909 AM.BaseReg = MemI.getOperand(1).getReg();
2910 if (AM.BaseReg == Reg)
2911 AM.BaseReg = MemI.getOperand(2).getReg();
2912 AM.ScaledReg = DefMI.getOperand(2).getReg();
2913 AM.Scale = OffsetScale;
2914 AM.Displacement = 0;
2916 return true;
2917 }
2918 }
2919 }
2920
2921 // Handle memory instructions with a [Reg, #Imm] addressing mode.
2922
2923 // Check we are not breaking a potential conversion to an LDP.
2924 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
2925 int64_t NewOffset) -> bool {
2926 int64_t MinOffset, MaxOffset;
2927 switch (NumBytes) {
2928 default:
2929 return true;
2930 case 4:
2931 MinOffset = -256;
2932 MaxOffset = 252;
2933 break;
2934 case 8:
2935 MinOffset = -512;
2936 MaxOffset = 504;
2937 break;
2938 case 16:
2939 MinOffset = -1024;
2940 MaxOffset = 1008;
2941 break;
2942 }
2943 return OldOffset < MinOffset || OldOffset > MaxOffset ||
2944 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
2945 };
2946 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
2947 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
2948 int64_t NewOffset = OldOffset + Disp;
2949 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
2950 return false;
2951 // If the old offset would fit into an LDP, but the new offset wouldn't,
2952 // bail out.
2953 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
2954 return false;
2955 AM.BaseReg = AddrI.getOperand(1).getReg();
2956 AM.ScaledReg = 0;
2957 AM.Scale = 0;
2958 AM.Displacement = NewOffset;
2960 return true;
2961 };
2962
2963 auto canFoldAddRegIntoAddrMode =
2964 [&](int64_t Scale,
2966 if (MemI.getOperand(2).getImm() != 0)
2967 return false;
2968 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
2969 return false;
2970 AM.BaseReg = AddrI.getOperand(1).getReg();
2971 AM.ScaledReg = AddrI.getOperand(2).getReg();
2972 AM.Scale = Scale;
2973 AM.Displacement = 0;
2974 AM.Form = Form;
2975 return true;
2976 };
2977
2978 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
2979 unsigned Opcode = MemI.getOpcode();
2980 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
2981 Subtarget.isSTRQroSlow();
2982 };
2983
2984 int64_t Disp = 0;
2985 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
2986 switch (AddrI.getOpcode()) {
2987 default:
2988 return false;
2989
2990 case AArch64::ADDXri:
2991 // add Xa, Xn, #N
2992 // ldr Xd, [Xa, #M]
2993 // ->
2994 // ldr Xd, [Xn, #N'+M]
2995 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
2996 return canFoldAddSubImmIntoAddrMode(Disp);
2997
2998 case AArch64::SUBXri:
2999 // sub Xa, Xn, #N
3000 // ldr Xd, [Xa, #M]
3001 // ->
3002 // ldr Xd, [Xn, #N'+M]
3003 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3004 return canFoldAddSubImmIntoAddrMode(-Disp);
3005
3006 case AArch64::ADDXrs: {
3007 // add Xa, Xn, Xm, lsl #N
3008 // ldr Xd, [Xa]
3009 // ->
3010 // ldr Xd, [Xn, Xm, lsl #N]
3011
3012 // Don't fold the add if the result would be slower, unless optimising for
3013 // size.
3014 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3016 return false;
3017 Shift = AArch64_AM::getShiftValue(Shift);
3018 if (!OptSize) {
3019 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3020 return false;
3021 if (avoidSlowSTRQ(MemI))
3022 return false;
3023 }
3024 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3025 }
3026
3027 case AArch64::ADDXrr:
3028 // add Xa, Xn, Xm
3029 // ldr Xd, [Xa]
3030 // ->
3031 // ldr Xd, [Xn, Xm, lsl #0]
3032
3033 // Don't fold the add if the result would be slower, unless optimising for
3034 // size.
3035 if (!OptSize && avoidSlowSTRQ(MemI))
3036 return false;
3037 return canFoldAddRegIntoAddrMode(1);
3038
3039 case AArch64::ADDXrx:
3040 // add Xa, Xn, Wm, {s,u}xtw #N
3041 // ldr Xd, [Xa]
3042 // ->
3043 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3044
3045 // Don't fold the add if the result would be slower, unless optimising for
3046 // size.
3047 if (!OptSize && avoidSlowSTRQ(MemI))
3048 return false;
3049
3050 // Can fold only sign-/zero-extend of a word.
3051 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3053 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3054 return false;
3055
3056 return canFoldAddRegIntoAddrMode(
3057 1ULL << AArch64_AM::getArithShiftValue(Imm),
3060 }
3061}
3062
3063// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3064// return the opcode of an instruction performing the same operation, but using
3065// the [Reg, Reg] addressing mode.
3066static unsigned regOffsetOpcode(unsigned Opcode) {
3067 switch (Opcode) {
3068 default:
3069 llvm_unreachable("Address folding not implemented for instruction");
3070
3071 case AArch64::LDURQi:
3072 case AArch64::LDRQui:
3073 return AArch64::LDRQroX;
3074 case AArch64::STURQi:
3075 case AArch64::STRQui:
3076 return AArch64::STRQroX;
3077 case AArch64::LDURDi:
3078 case AArch64::LDRDui:
3079 return AArch64::LDRDroX;
3080 case AArch64::STURDi:
3081 case AArch64::STRDui:
3082 return AArch64::STRDroX;
3083 case AArch64::LDURXi:
3084 case AArch64::LDRXui:
3085 return AArch64::LDRXroX;
3086 case AArch64::STURXi:
3087 case AArch64::STRXui:
3088 return AArch64::STRXroX;
3089 case AArch64::LDURWi:
3090 case AArch64::LDRWui:
3091 return AArch64::LDRWroX;
3092 case AArch64::LDURSWi:
3093 case AArch64::LDRSWui:
3094 return AArch64::LDRSWroX;
3095 case AArch64::STURWi:
3096 case AArch64::STRWui:
3097 return AArch64::STRWroX;
3098 case AArch64::LDURHi:
3099 case AArch64::LDRHui:
3100 return AArch64::LDRHroX;
3101 case AArch64::STURHi:
3102 case AArch64::STRHui:
3103 return AArch64::STRHroX;
3104 case AArch64::LDURHHi:
3105 case AArch64::LDRHHui:
3106 return AArch64::LDRHHroX;
3107 case AArch64::STURHHi:
3108 case AArch64::STRHHui:
3109 return AArch64::STRHHroX;
3110 case AArch64::LDURSHXi:
3111 case AArch64::LDRSHXui:
3112 return AArch64::LDRSHXroX;
3113 case AArch64::LDURSHWi:
3114 case AArch64::LDRSHWui:
3115 return AArch64::LDRSHWroX;
3116 case AArch64::LDURBi:
3117 case AArch64::LDRBui:
3118 return AArch64::LDRBroX;
3119 case AArch64::LDURBBi:
3120 case AArch64::LDRBBui:
3121 return AArch64::LDRBBroX;
3122 case AArch64::LDURSBXi:
3123 case AArch64::LDRSBXui:
3124 return AArch64::LDRSBXroX;
3125 case AArch64::LDURSBWi:
3126 case AArch64::LDRSBWui:
3127 return AArch64::LDRSBWroX;
3128 case AArch64::STURBi:
3129 case AArch64::STRBui:
3130 return AArch64::STRBroX;
3131 case AArch64::STURBBi:
3132 case AArch64::STRBBui:
3133 return AArch64::STRBBroX;
3134 }
3135}
3136
3137// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3138// the opcode of an instruction performing the same operation, but using the
3139// [Reg, #Imm] addressing mode with scaled offset.
3140unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3141 switch (Opcode) {
3142 default:
3143 llvm_unreachable("Address folding not implemented for instruction");
3144
3145 case AArch64::LDURQi:
3146 Scale = 16;
3147 return AArch64::LDRQui;
3148 case AArch64::STURQi:
3149 Scale = 16;
3150 return AArch64::STRQui;
3151 case AArch64::LDURDi:
3152 Scale = 8;
3153 return AArch64::LDRDui;
3154 case AArch64::STURDi:
3155 Scale = 8;
3156 return AArch64::STRDui;
3157 case AArch64::LDURXi:
3158 Scale = 8;
3159 return AArch64::LDRXui;
3160 case AArch64::STURXi:
3161 Scale = 8;
3162 return AArch64::STRXui;
3163 case AArch64::LDURWi:
3164 Scale = 4;
3165 return AArch64::LDRWui;
3166 case AArch64::LDURSWi:
3167 Scale = 4;
3168 return AArch64::LDRSWui;
3169 case AArch64::STURWi:
3170 Scale = 4;
3171 return AArch64::STRWui;
3172 case AArch64::LDURHi:
3173 Scale = 2;
3174 return AArch64::LDRHui;
3175 case AArch64::STURHi:
3176 Scale = 2;
3177 return AArch64::STRHui;
3178 case AArch64::LDURHHi:
3179 Scale = 2;
3180 return AArch64::LDRHHui;
3181 case AArch64::STURHHi:
3182 Scale = 2;
3183 return AArch64::STRHHui;
3184 case AArch64::LDURSHXi:
3185 Scale = 2;
3186 return AArch64::LDRSHXui;
3187 case AArch64::LDURSHWi:
3188 Scale = 2;
3189 return AArch64::LDRSHWui;
3190 case AArch64::LDURBi:
3191 Scale = 1;
3192 return AArch64::LDRBui;
3193 case AArch64::LDURBBi:
3194 Scale = 1;
3195 return AArch64::LDRBBui;
3196 case AArch64::LDURSBXi:
3197 Scale = 1;
3198 return AArch64::LDRSBXui;
3199 case AArch64::LDURSBWi:
3200 Scale = 1;
3201 return AArch64::LDRSBWui;
3202 case AArch64::STURBi:
3203 Scale = 1;
3204 return AArch64::STRBui;
3205 case AArch64::STURBBi:
3206 Scale = 1;
3207 return AArch64::STRBBui;
3208 case AArch64::LDRQui:
3209 case AArch64::STRQui:
3210 Scale = 16;
3211 return Opcode;
3212 case AArch64::LDRDui:
3213 case AArch64::STRDui:
3214 case AArch64::LDRXui:
3215 case AArch64::STRXui:
3216 Scale = 8;
3217 return Opcode;
3218 case AArch64::LDRWui:
3219 case AArch64::LDRSWui:
3220 case AArch64::STRWui:
3221 Scale = 4;
3222 return Opcode;
3223 case AArch64::LDRHui:
3224 case AArch64::STRHui:
3225 case AArch64::LDRHHui:
3226 case AArch64::STRHHui:
3227 case AArch64::LDRSHXui:
3228 case AArch64::LDRSHWui:
3229 Scale = 2;
3230 return Opcode;
3231 case AArch64::LDRBui:
3232 case AArch64::LDRBBui:
3233 case AArch64::LDRSBXui:
3234 case AArch64::LDRSBWui:
3235 case AArch64::STRBui:
3236 case AArch64::STRBBui:
3237 Scale = 1;
3238 return Opcode;
3239 }
3240}
3241
3242// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3243// the opcode of an instruction performing the same operation, but using the
3244// [Reg, #Imm] addressing mode with unscaled offset.
3245unsigned unscaledOffsetOpcode(unsigned Opcode) {
3246 switch (Opcode) {
3247 default:
3248 llvm_unreachable("Address folding not implemented for instruction");
3249
3250 case AArch64::LDURQi:
3251 case AArch64::STURQi:
3252 case AArch64::LDURDi:
3253 case AArch64::STURDi:
3254 case AArch64::LDURXi:
3255 case AArch64::STURXi:
3256 case AArch64::LDURWi:
3257 case AArch64::LDURSWi:
3258 case AArch64::STURWi:
3259 case AArch64::LDURHi:
3260 case AArch64::STURHi:
3261 case AArch64::LDURHHi:
3262 case AArch64::STURHHi:
3263 case AArch64::LDURSHXi:
3264 case AArch64::LDURSHWi:
3265 case AArch64::LDURBi:
3266 case AArch64::STURBi:
3267 case AArch64::LDURBBi:
3268 case AArch64::STURBBi:
3269 case AArch64::LDURSBWi:
3270 case AArch64::LDURSBXi:
3271 return Opcode;
3272 case AArch64::LDRQui:
3273 return AArch64::LDURQi;
3274 case AArch64::STRQui:
3275 return AArch64::STURQi;
3276 case AArch64::LDRDui:
3277 return AArch64::LDURDi;
3278 case AArch64::STRDui:
3279 return AArch64::STURDi;
3280 case AArch64::LDRXui:
3281 return AArch64::LDURXi;
3282 case AArch64::STRXui:
3283 return AArch64::STURXi;
3284 case AArch64::LDRWui:
3285 return AArch64::LDURWi;
3286 case AArch64::LDRSWui:
3287 return AArch64::LDURSWi;
3288 case AArch64::STRWui:
3289 return AArch64::STURWi;
3290 case AArch64::LDRHui:
3291 return AArch64::LDURHi;
3292 case AArch64::STRHui:
3293 return AArch64::STURHi;
3294 case AArch64::LDRHHui:
3295 return AArch64::LDURHHi;
3296 case AArch64::STRHHui:
3297 return AArch64::STURHHi;
3298 case AArch64::LDRSHXui:
3299 return AArch64::LDURSHXi;
3300 case AArch64::LDRSHWui:
3301 return AArch64::LDURSHWi;
3302 case AArch64::LDRBBui:
3303 return AArch64::LDURBBi;
3304 case AArch64::LDRBui:
3305 return AArch64::LDURBi;
3306 case AArch64::STRBBui:
3307 return AArch64::STURBBi;
3308 case AArch64::STRBui:
3309 return AArch64::STURBi;
3310 case AArch64::LDRSBWui:
3311 return AArch64::LDURSBWi;
3312 case AArch64::LDRSBXui:
3313 return AArch64::LDURSBXi;
3314 }
3315}
3316
3317// Given the opcode of a memory load/store instruction, return the opcode of an
3318// instruction performing the same operation, but using
3319// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3320// offset register.
3321static unsigned offsetExtendOpcode(unsigned Opcode) {
3322 switch (Opcode) {
3323 default:
3324 llvm_unreachable("Address folding not implemented for instruction");
3325
3326 case AArch64::LDRQroX:
3327 case AArch64::LDURQi:
3328 case AArch64::LDRQui:
3329 return AArch64::LDRQroW;
3330 case AArch64::STRQroX:
3331 case AArch64::STURQi:
3332 case AArch64::STRQui:
3333 return AArch64::STRQroW;
3334 case AArch64::LDRDroX:
3335 case AArch64::LDURDi:
3336 case AArch64::LDRDui:
3337 return AArch64::LDRDroW;
3338 case AArch64::STRDroX:
3339 case AArch64::STURDi:
3340 case AArch64::STRDui:
3341 return AArch64::STRDroW;
3342 case AArch64::LDRXroX:
3343 case AArch64::LDURXi:
3344 case AArch64::LDRXui:
3345 return AArch64::LDRXroW;
3346 case AArch64::STRXroX:
3347 case AArch64::STURXi:
3348 case AArch64::STRXui:
3349 return AArch64::STRXroW;
3350 case AArch64::LDRWroX:
3351 case AArch64::LDURWi:
3352 case AArch64::LDRWui:
3353 return AArch64::LDRWroW;
3354 case AArch64::LDRSWroX:
3355 case AArch64::LDURSWi:
3356 case AArch64::LDRSWui:
3357 return AArch64::LDRSWroW;
3358 case AArch64::STRWroX:
3359 case AArch64::STURWi:
3360 case AArch64::STRWui:
3361 return AArch64::STRWroW;
3362 case AArch64::LDRHroX:
3363 case AArch64::LDURHi:
3364 case AArch64::LDRHui:
3365 return AArch64::LDRHroW;
3366 case AArch64::STRHroX:
3367 case AArch64::STURHi:
3368 case AArch64::STRHui:
3369 return AArch64::STRHroW;
3370 case AArch64::LDRHHroX:
3371 case AArch64::LDURHHi:
3372 case AArch64::LDRHHui:
3373 return AArch64::LDRHHroW;
3374 case AArch64::STRHHroX:
3375 case AArch64::STURHHi:
3376 case AArch64::STRHHui:
3377 return AArch64::STRHHroW;
3378 case AArch64::LDRSHXroX:
3379 case AArch64::LDURSHXi:
3380 case AArch64::LDRSHXui:
3381 return AArch64::LDRSHXroW;
3382 case AArch64::LDRSHWroX:
3383 case AArch64::LDURSHWi:
3384 case AArch64::LDRSHWui:
3385 return AArch64::LDRSHWroW;
3386 case AArch64::LDRBroX:
3387 case AArch64::LDURBi:
3388 case AArch64::LDRBui:
3389 return AArch64::LDRBroW;
3390 case AArch64::LDRBBroX:
3391 case AArch64::LDURBBi:
3392 case AArch64::LDRBBui:
3393 return AArch64::LDRBBroW;
3394 case AArch64::LDRSBXroX:
3395 case AArch64::LDURSBXi:
3396 case AArch64::LDRSBXui:
3397 return AArch64::LDRSBXroW;
3398 case AArch64::LDRSBWroX:
3399 case AArch64::LDURSBWi:
3400 case AArch64::LDRSBWui:
3401 return AArch64::LDRSBWroW;
3402 case AArch64::STRBroX:
3403 case AArch64::STURBi:
3404 case AArch64::STRBui:
3405 return AArch64::STRBroW;
3406 case AArch64::STRBBroX:
3407 case AArch64::STURBBi:
3408 case AArch64::STRBBui:
3409 return AArch64::STRBBroW;
3410 }
3411}
3412
3414 const ExtAddrMode &AM) const {
3415
3416 const DebugLoc &DL = MemI.getDebugLoc();
3417 MachineBasicBlock &MBB = *MemI.getParent();
3419
3421 if (AM.ScaledReg) {
3422 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3423 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3424 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3425 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3426 .addReg(MemI.getOperand(0).getReg(),
3427 MemI.mayLoad() ? RegState::Define : 0)
3428 .addReg(AM.BaseReg)
3429 .addReg(AM.ScaledReg)
3430 .addImm(0)
3431 .addImm(AM.Scale > 1)
3432 .setMemRefs(MemI.memoperands())
3433 .setMIFlags(MemI.getFlags());
3434 return B.getInstr();
3435 }
3436
3437 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3438 "Addressing mode not supported for folding");
3439
3440 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3441 unsigned Scale = 1;
3442 unsigned Opcode = MemI.getOpcode();
3443 if (isInt<9>(AM.Displacement))
3444 Opcode = unscaledOffsetOpcode(Opcode);
3445 else
3446 Opcode = scaledOffsetOpcode(Opcode, Scale);
3447
3448 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3449 .addReg(MemI.getOperand(0).getReg(),
3450 MemI.mayLoad() ? RegState::Define : 0)
3451 .addReg(AM.BaseReg)
3452 .addImm(AM.Displacement / Scale)
3453 .setMemRefs(MemI.memoperands())
3454 .setMIFlags(MemI.getFlags());
3455 return B.getInstr();
3456 }
3457
3460 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3461 assert(AM.ScaledReg && !AM.Displacement &&
3462 "Address offset can be a register or an immediate, but not both");
3463 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3464 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3465 // Make sure the offset register is in the correct register class.
3466 Register OffsetReg = AM.ScaledReg;
3467 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3468 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3469 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3470 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3471 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3472 }
3473 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3474 .addReg(MemI.getOperand(0).getReg(),
3475 MemI.mayLoad() ? RegState::Define : 0)
3476 .addReg(AM.BaseReg)
3477 .addReg(OffsetReg)
3479 .addImm(AM.Scale != 1)
3480 .setMemRefs(MemI.memoperands())
3481 .setMIFlags(MemI.getFlags());
3482
3483 return B.getInstr();
3484 }
3485
3487 "Function must not be called with an addressing mode it can't handle");
3488}
3489
3491 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
3492 bool &OffsetIsScalable, TypeSize &Width,
3493 const TargetRegisterInfo *TRI) const {
3494 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3495 // Handle only loads/stores with base register followed by immediate offset.
3496 if (LdSt.getNumExplicitOperands() == 3) {
3497 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
3498 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
3499 !LdSt.getOperand(2).isImm())
3500 return false;
3501 } else if (LdSt.getNumExplicitOperands() == 4) {
3502 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
3503 if (!LdSt.getOperand(1).isReg() ||
3504 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
3505 !LdSt.getOperand(3).isImm())
3506 return false;
3507 } else
3508 return false;
3509
3510 // Get the scaling factor for the instruction and set the width for the
3511 // instruction.
3512 TypeSize Scale(0U, false);
3513 int64_t Dummy1, Dummy2;
3514
3515 // If this returns false, then it's an instruction we don't want to handle.
3516 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
3517 return false;
3518
3519 // Compute the offset. Offset is calculated as the immediate operand
3520 // multiplied by the scaling factor. Unscaled instructions have scaling factor
3521 // set to 1.
3522 if (LdSt.getNumExplicitOperands() == 3) {
3523 BaseOp = &LdSt.getOperand(1);
3524 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
3525 } else {
3526 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
3527 BaseOp = &LdSt.getOperand(2);
3528 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
3529 }
3530 OffsetIsScalable = Scale.isScalable();
3531
3532 if (!BaseOp->isReg() && !BaseOp->isFI())
3533 return false;
3534
3535 return true;
3536}
3537
3540 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
3541 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
3542 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
3543 return OfsOp;
3544}
3545
3546bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
3547 TypeSize &Width, int64_t &MinOffset,
3548 int64_t &MaxOffset) {
3549 switch (Opcode) {
3550 // Not a memory operation or something we want to handle.
3551 default:
3552 Scale = TypeSize::getFixed(0);
3553 Width = TypeSize::getFixed(0);
3554 MinOffset = MaxOffset = 0;
3555 return false;
3556 case AArch64::STRWpost:
3557 case AArch64::LDRWpost:
3558 Width = TypeSize::getFixed(32);
3559 Scale = TypeSize::getFixed(4);
3560 MinOffset = -256;
3561 MaxOffset = 255;
3562 break;
3563 case AArch64::LDURQi:
3564 case AArch64::STURQi:
3565 Width = TypeSize::getFixed(16);
3566 Scale = TypeSize::getFixed(1);
3567 MinOffset = -256;
3568 MaxOffset = 255;
3569 break;
3570 case AArch64::PRFUMi:
3571 case AArch64::LDURXi:
3572 case AArch64::LDURDi:
3573 case AArch64::LDAPURXi:
3574 case AArch64::STURXi:
3575 case AArch64::STURDi:
3576 case AArch64::STLURXi:
3577 Width = TypeSize::getFixed(8);
3578 Scale = TypeSize::getFixed(1);
3579 MinOffset = -256;
3580 MaxOffset = 255;
3581 break;
3582 case AArch64::LDURWi:
3583 case AArch64::LDURSi:
3584 case AArch64::LDURSWi:
3585 case AArch64::LDAPURi:
3586 case AArch64::LDAPURSWi:
3587 case AArch64::STURWi:
3588 case AArch64::STURSi:
3589 case AArch64::STLURWi:
3590 Width = TypeSize::getFixed(4);
3591 Scale = TypeSize::getFixed(1);
3592 MinOffset = -256;
3593 MaxOffset = 255;
3594 break;
3595 case AArch64::LDURHi:
3596 case AArch64::LDURHHi:
3597 case AArch64::LDURSHXi:
3598 case AArch64::LDURSHWi:
3599 case AArch64::LDAPURHi:
3600 case AArch64::LDAPURSHWi:
3601 case AArch64::LDAPURSHXi:
3602 case AArch64::STURHi:
3603 case AArch64::STURHHi:
3604 case AArch64::STLURHi:
3605 Width = TypeSize::getFixed(2);
3606 Scale = TypeSize::getFixed(1);
3607 MinOffset = -256;
3608 MaxOffset = 255;
3609 break;
3610 case AArch64::LDURBi:
3611 case AArch64::LDURBBi:
3612 case AArch64::LDURSBXi:
3613 case AArch64::LDURSBWi:
3614 case AArch64::LDAPURBi:
3615 case AArch64::LDAPURSBWi:
3616 case AArch64::LDAPURSBXi:
3617 case AArch64::STURBi:
3618 case AArch64::STURBBi:
3619 case AArch64::STLURBi:
3620 Width = TypeSize::getFixed(1);
3621 Scale = TypeSize::getFixed(1);
3622 MinOffset = -256;
3623 MaxOffset = 255;
3624 break;
3625 case AArch64::LDPQi:
3626 case AArch64::LDNPQi:
3627 case AArch64::STPQi:
3628 case AArch64::STNPQi:
3629 Scale = TypeSize::getFixed(16);
3630 Width = TypeSize::getFixed(32);
3631 MinOffset = -64;
3632 MaxOffset = 63;
3633 break;
3634 case AArch64::LDRQui:
3635 case AArch64::STRQui:
3636 Scale = TypeSize::getFixed(16);
3637 Width = TypeSize::getFixed(16);
3638 MinOffset = 0;
3639 MaxOffset = 4095;
3640 break;
3641 case AArch64::LDPXi:
3642 case AArch64::LDPDi:
3643 case AArch64::LDNPXi:
3644 case AArch64::LDNPDi:
3645 case AArch64::STPXi:
3646 case AArch64::STPDi:
3647 case AArch64::STNPXi:
3648 case AArch64::STNPDi:
3649 Scale = TypeSize::getFixed(8);
3650 Width = TypeSize::getFixed(16);
3651 MinOffset = -64;
3652 MaxOffset = 63;
3653 break;
3654 case AArch64::PRFMui:
3655 case AArch64::LDRXui:
3656 case AArch64::LDRDui:
3657 case AArch64::STRXui:
3658 case AArch64::STRDui:
3659 Scale = TypeSize::getFixed(8);
3660 Width = TypeSize::getFixed(8);
3661 MinOffset = 0;
3662 MaxOffset = 4095;
3663 break;
3664 case AArch64::StoreSwiftAsyncContext:
3665 // Store is an STRXui, but there might be an ADDXri in the expansion too.
3666 Scale = TypeSize::getFixed(1);
3667 Width = TypeSize::getFixed(8);
3668 MinOffset = 0;
3669 MaxOffset = 4095;
3670 break;
3671 case AArch64::LDPWi:
3672 case AArch64::LDPSi:
3673 case AArch64::LDNPWi:
3674 case AArch64::LDNPSi:
3675 case AArch64::STPWi:
3676 case AArch64::STPSi:
3677 case AArch64::STNPWi:
3678 case AArch64::STNPSi:
3679 Scale = TypeSize::getFixed(4);
3680 Width = TypeSize::getFixed(8);
3681 MinOffset = -64;
3682 MaxOffset = 63;
3683 break;
3684 case AArch64::LDRWui:
3685 case AArch64::LDRSui:
3686 case AArch64::LDRSWui:
3687 case AArch64::STRWui:
3688 case AArch64::STRSui:
3689 Scale = TypeSize::getFixed(4);
3690 Width = TypeSize::getFixed(4);
3691 MinOffset = 0;
3692 MaxOffset = 4095;
3693 break;
3694 case AArch64::LDRHui:
3695 case AArch64::LDRHHui:
3696 case AArch64::LDRSHWui:
3697 case AArch64::LDRSHXui:
3698 case AArch64::STRHui:
3699 case AArch64::STRHHui:
3700 Scale = TypeSize::getFixed(2);
3701 Width = TypeSize::getFixed(2);
3702 MinOffset = 0;
3703 MaxOffset = 4095;
3704 break;
3705 case AArch64::LDRBui:
3706 case AArch64::LDRBBui:
3707 case AArch64::LDRSBWui:
3708 case AArch64::LDRSBXui:
3709 case AArch64::STRBui:
3710 case AArch64::STRBBui:
3711 Scale = TypeSize::getFixed(1);
3712 Width = TypeSize::getFixed(1);
3713 MinOffset = 0;
3714 MaxOffset = 4095;
3715 break;
3716 case AArch64::STPXpre:
3717 case AArch64::LDPXpost:
3718 case AArch64::STPDpre:
3719 case AArch64::LDPDpost:
3720 Scale = TypeSize::getFixed(8);
3721 Width = TypeSize::getFixed(8);
3722 MinOffset = -512;
3723 MaxOffset = 504;
3724 break;
3725 case AArch64::STPQpre:
3726 case AArch64::LDPQpost:
3727 Scale = TypeSize::getFixed(16);
3728 Width = TypeSize::getFixed(16);
3729 MinOffset = -1024;
3730 MaxOffset = 1008;
3731 break;
3732 case AArch64::STRXpre:
3733 case AArch64::STRDpre:
3734 case AArch64::LDRXpost:
3735 case AArch64::LDRDpost:
3736 Scale = TypeSize::getFixed(1);
3737 Width = TypeSize::getFixed(8);
3738 MinOffset = -256;
3739 MaxOffset = 255;
3740 break;
3741 case AArch64::STRQpre:
3742 case AArch64::LDRQpost:
3743 Scale = TypeSize::getFixed(1);
3744 Width = TypeSize::getFixed(16);
3745 MinOffset = -256;
3746 MaxOffset = 255;
3747 break;
3748 case AArch64::ADDG:
3749 Scale = TypeSize::getFixed(16);
3750 Width = TypeSize::getFixed(0);
3751 MinOffset = 0;
3752 MaxOffset = 63;
3753 break;
3754 case AArch64::TAGPstack:
3755 Scale = TypeSize::getFixed(16);
3756 Width = TypeSize::getFixed(0);
3757 // TAGP with a negative offset turns into SUBP, which has a maximum offset
3758 // of 63 (not 64!).
3759 MinOffset = -63;
3760 MaxOffset = 63;
3761 break;
3762 case AArch64::LDG:
3763 case AArch64::STGi:
3764 case AArch64::STZGi:
3765 Scale = TypeSize::getFixed(16);
3766 Width = TypeSize::getFixed(16);
3767 MinOffset = -256;
3768 MaxOffset = 255;
3769 break;
3770 case AArch64::STR_ZZZZXI:
3771 case AArch64::LDR_ZZZZXI:
3772 Scale = TypeSize::getScalable(16);
3773 Width = TypeSize::getScalable(16 * 4);
3774 MinOffset = -256;
3775 MaxOffset = 252;
3776 break;
3777 case AArch64::STR_ZZZXI:
3778 case AArch64::LDR_ZZZXI:
3779 Scale = TypeSize::getScalable(16);
3780 Width = TypeSize::getScalable(16 * 3);
3781 MinOffset = -256;
3782 MaxOffset = 253;
3783 break;
3784 case AArch64::STR_ZZXI:
3785 case AArch64::LDR_ZZXI:
3786 Scale = TypeSize::getScalable(16);
3787 Width = TypeSize::getScalable(16 * 2);
3788 MinOffset = -256;
3789 MaxOffset = 254;
3790 break;
3791 case AArch64::LDR_PXI:
3792 case AArch64::STR_PXI:
3793 Scale = TypeSize::getScalable(2);
3794 Width = TypeSize::getScalable(2);
3795 MinOffset = -256;
3796 MaxOffset = 255;
3797 break;
3798 case AArch64::LDR_PPXI:
3799 case AArch64::STR_PPXI:
3800 Scale = TypeSize::getScalable(2);
3801 Width = TypeSize::getScalable(2 * 2);
3802 MinOffset = -256;
3803 MaxOffset = 254;
3804 break;
3805 case AArch64::LDR_ZXI:
3806 case AArch64::STR_ZXI:
3807 Scale = TypeSize::getScalable(16);
3808 Width = TypeSize::getScalable(16);
3809 MinOffset = -256;
3810 MaxOffset = 255;
3811 break;
3812 case AArch64::LD1B_IMM:
3813 case AArch64::LD1H_IMM:
3814 case AArch64::LD1W_IMM:
3815 case AArch64::LD1D_IMM:
3816 case AArch64::LDNT1B_ZRI:
3817 case AArch64::LDNT1H_ZRI:
3818 case AArch64::LDNT1W_ZRI:
3819 case AArch64::LDNT1D_ZRI:
3820 case AArch64::ST1B_IMM:
3821 case AArch64::ST1H_IMM:
3822 case AArch64::ST1W_IMM:
3823 case AArch64::ST1D_IMM:
3824 case AArch64::STNT1B_ZRI:
3825 case AArch64::STNT1H_ZRI:
3826 case AArch64::STNT1W_ZRI:
3827 case AArch64::STNT1D_ZRI:
3828 case AArch64::LDNF1B_IMM:
3829 case AArch64::LDNF1H_IMM:
3830 case AArch64::LDNF1W_IMM:
3831 case AArch64::LDNF1D_IMM:
3832 // A full vectors worth of data
3833 // Width = mbytes * elements
3834 Scale = TypeSize::getScalable(16);
3835 Width = TypeSize::getScalable(16);
3836 MinOffset = -8;
3837 MaxOffset = 7;
3838 break;
3839 case AArch64::LD2B_IMM:
3840 case AArch64::LD2H_IMM:
3841 case AArch64::LD2W_IMM:
3842 case AArch64::LD2D_IMM:
3843 case AArch64::ST2B_IMM:
3844 case AArch64::ST2H_IMM:
3845 case AArch64::ST2W_IMM:
3846 case AArch64::ST2D_IMM:
3847 Scale = TypeSize::getScalable(32);
3848 Width = TypeSize::getScalable(16 * 2);
3849 MinOffset = -8;
3850 MaxOffset = 7;
3851 break;
3852 case AArch64::LD3B_IMM:
3853 case AArch64::LD3H_IMM:
3854 case AArch64::LD3W_IMM:
3855 case AArch64::LD3D_IMM:
3856 case AArch64::ST3B_IMM:
3857 case AArch64::ST3H_IMM:
3858 case AArch64::ST3W_IMM:
3859 case AArch64::ST3D_IMM:
3860 Scale = TypeSize::getScalable(48);
3861 Width = TypeSize::getScalable(16 * 3);
3862 MinOffset = -8;
3863 MaxOffset = 7;
3864 break;
3865 case AArch64::LD4B_IMM:
3866 case AArch64::LD4H_IMM:
3867 case AArch64::LD4W_IMM:
3868 case AArch64::LD4D_IMM:
3869 case AArch64::ST4B_IMM:
3870 case AArch64::ST4H_IMM:
3871 case AArch64::ST4W_IMM:
3872 case AArch64::ST4D_IMM:
3873 Scale = TypeSize::getScalable(64);
3874 Width = TypeSize::getScalable(16 * 4);
3875 MinOffset = -8;
3876 MaxOffset = 7;
3877 break;
3878 case AArch64::LD1B_H_IMM:
3879 case AArch64::LD1SB_H_IMM:
3880 case AArch64::LD1H_S_IMM:
3881 case AArch64::LD1SH_S_IMM:
3882 case AArch64::LD1W_D_IMM:
3883 case AArch64::LD1SW_D_IMM:
3884 case AArch64::ST1B_H_IMM:
3885 case AArch64::ST1H_S_IMM:
3886 case AArch64::ST1W_D_IMM:
3887 case AArch64::LDNF1B_H_IMM:
3888 case AArch64::LDNF1SB_H_IMM:
3889 case AArch64::LDNF1H_S_IMM:
3890 case AArch64::LDNF1SH_S_IMM:
3891 case AArch64::LDNF1W_D_IMM:
3892 case AArch64::LDNF1SW_D_IMM:
3893 // A half vector worth of data
3894 // Width = mbytes * elements
3895 Scale = TypeSize::getScalable(8);
3896 Width = TypeSize::getScalable(8);
3897 MinOffset = -8;
3898 MaxOffset = 7;
3899 break;
3900 case AArch64::LD1B_S_IMM:
3901 case AArch64::LD1SB_S_IMM:
3902 case AArch64::LD1H_D_IMM:
3903 case AArch64::LD1SH_D_IMM:
3904 case AArch64::ST1B_S_IMM:
3905 case AArch64::ST1H_D_IMM:
3906 case AArch64::LDNF1B_S_IMM:
3907 case AArch64::LDNF1SB_S_IMM:
3908 case AArch64::LDNF1H_D_IMM:
3909 case AArch64::LDNF1SH_D_IMM:
3910 // A quarter vector worth of data
3911 // Width = mbytes * elements
3912 Scale = TypeSize::getScalable(4);
3913 Width = TypeSize::getScalable(4);
3914 MinOffset = -8;
3915 MaxOffset = 7;
3916 break;
3917 case AArch64::LD1B_D_IMM:
3918 case AArch64::LD1SB_D_IMM:
3919 case AArch64::ST1B_D_IMM:
3920 case AArch64::LDNF1B_D_IMM:
3921 case AArch64::LDNF1SB_D_IMM:
3922 // A eighth vector worth of data
3923 // Width = mbytes * elements
3924 Scale = TypeSize::getScalable(2);
3925 Width = TypeSize::getScalable(2);
3926 MinOffset = -8;
3927 MaxOffset = 7;
3928 break;
3929 case AArch64::ST2Gi:
3930 case AArch64::STZ2Gi:
3931 Scale = TypeSize::getFixed(16);
3932 Width = TypeSize::getFixed(32);
3933 MinOffset = -256;
3934 MaxOffset = 255;
3935 break;
3936 case AArch64::STGPi:
3937 Scale = TypeSize::getFixed(16);
3938 Width = TypeSize::getFixed(16);
3939 MinOffset = -64;
3940 MaxOffset = 63;
3941 break;
3942 case AArch64::LD1RB_IMM:
3943 case AArch64::LD1RB_H_IMM:
3944 case AArch64::LD1RB_S_IMM:
3945 case AArch64::LD1RB_D_IMM:
3946 case AArch64::LD1RSB_H_IMM:
3947 case AArch64::LD1RSB_S_IMM:
3948 case AArch64::LD1RSB_D_IMM:
3949 Scale = TypeSize::getFixed(1);
3950 Width = TypeSize::getFixed(1);
3951 MinOffset = 0;
3952 MaxOffset = 63;
3953 break;
3954 case AArch64::LD1RH_IMM:
3955 case AArch64::LD1RH_S_IMM:
3956 case AArch64::LD1RH_D_IMM:
3957 case AArch64::LD1RSH_S_IMM:
3958 case AArch64::LD1RSH_D_IMM:
3959 Scale = TypeSize::getFixed(2);
3960 Width = TypeSize::getFixed(2);
3961 MinOffset = 0;
3962 MaxOffset = 63;
3963 break;
3964 case AArch64::LD1RW_IMM:
3965 case AArch64::LD1RW_D_IMM:
3966 case AArch64::LD1RSW_IMM:
3967 Scale = TypeSize::getFixed(4);
3968 Width = TypeSize::getFixed(4);
3969 MinOffset = 0;
3970 MaxOffset = 63;
3971 break;
3972 case AArch64::LD1RD_IMM:
3973 Scale = TypeSize::getFixed(8);
3974 Width = TypeSize::getFixed(8);
3975 MinOffset = 0;
3976 MaxOffset = 63;
3977 break;
3978 }
3979
3980 return true;
3981}
3982
3983// Scaling factor for unscaled load or store.
3985 switch (Opc) {
3986 default:
3987 llvm_unreachable("Opcode has unknown scale!");
3988 case AArch64::LDRBBui:
3989 case AArch64::LDURBBi:
3990 case AArch64::LDRSBWui:
3991 case AArch64::LDURSBWi:
3992 case AArch64::STRBBui:
3993 case AArch64::STURBBi:
3994 return 1;
3995 case AArch64::LDRHHui:
3996 case AArch64::LDURHHi:
3997 case AArch64::LDRSHWui:
3998 case AArch64::LDURSHWi:
3999 case AArch64::STRHHui:
4000 case AArch64::STURHHi:
4001 return 2;
4002 case AArch64::LDRSui:
4003 case AArch64::LDURSi:
4004 case AArch64::LDRSpre:
4005 case AArch64::LDRSWui:
4006 case AArch64::LDURSWi:
4007 case AArch64::LDRSWpre:
4008 case AArch64::LDRWpre:
4009 case AArch64::LDRWui:
4010 case AArch64::LDURWi:
4011 case AArch64::STRSui:
4012 case AArch64::STURSi:
4013 case AArch64::STRSpre:
4014 case AArch64::STRWui:
4015 case AArch64::STURWi:
4016 case AArch64::STRWpre:
4017 case AArch64::LDPSi:
4018 case AArch64::LDPSWi:
4019 case AArch64::LDPWi:
4020 case AArch64::STPSi:
4021 case AArch64::STPWi:
4022 return 4;
4023 case AArch64::LDRDui:
4024 case AArch64::LDURDi:
4025 case AArch64::LDRDpre:
4026 case AArch64::LDRXui:
4027 case AArch64::LDURXi:
4028 case AArch64::LDRXpre:
4029 case AArch64::STRDui:
4030 case AArch64::STURDi:
4031 case AArch64::STRDpre:
4032 case AArch64::STRXui:
4033 case AArch64::STURXi:
4034 case AArch64::STRXpre:
4035 case AArch64::LDPDi:
4036 case AArch64::LDPXi:
4037 case AArch64::STPDi:
4038 case AArch64::STPXi:
4039 return 8;
4040 case AArch64::LDRQui:
4041 case AArch64::LDURQi:
4042 case AArch64::STRQui:
4043 case AArch64::STURQi:
4044 case AArch64::STRQpre:
4045 case AArch64::LDPQi:
4046 case AArch64::LDRQpre:
4047 case AArch64::STPQi:
4048 case AArch64::STGi:
4049 case AArch64::STZGi:
4050 case AArch64::ST2Gi:
4051 case AArch64::STZ2Gi:
4052 case AArch64::STGPi:
4053 return 16;
4054 }
4055}
4056
4058 switch (MI.getOpcode()) {
4059 default:
4060 return false;
4061 case AArch64::LDRWpre:
4062 case AArch64::LDRXpre:
4063 case AArch64::LDRSWpre:
4064 case AArch64::LDRSpre:
4065 case AArch64::LDRDpre:
4066 case AArch64::LDRQpre:
4067 return true;
4068 }
4069}
4070
4072 switch (MI.getOpcode()) {
4073 default:
4074 return false;
4075 case AArch64::STRWpre:
4076 case AArch64::STRXpre:
4077 case AArch64::STRSpre:
4078 case AArch64::STRDpre:
4079 case AArch64::STRQpre:
4080 return true;
4081 }
4082}
4083
4085 return isPreLd(MI) || isPreSt(MI);
4086}
4087
4089 switch (MI.getOpcode()) {
4090 default:
4091 return false;
4092 case AArch64::LDPSi:
4093 case AArch64::LDPSWi:
4094 case AArch64::LDPDi:
4095 case AArch64::LDPQi:
4096 case AArch64::LDPWi:
4097 case AArch64::LDPXi:
4098 case AArch64::STPSi:
4099 case AArch64::STPDi:
4100 case AArch64::STPQi:
4101 case AArch64::STPWi:
4102 case AArch64::STPXi:
4103 case AArch64::STGPi:
4104 return true;
4105 }
4106}
4107
4109 unsigned Idx =
4111 : 1;
4112 return MI.getOperand(Idx);
4113}
4114
4115const MachineOperand &
4117 unsigned Idx =
4119 : 2;
4120 return MI.getOperand(Idx);
4121}
4122
4124 Register Reg) {
4125 if (MI.getParent() == nullptr)
4126 return nullptr;
4127 const MachineFunction *MF = MI.getParent()->getParent();
4128 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4129}
4130
4132 auto IsHFPR = [&](const MachineOperand &Op) {
4133 if (!Op.isReg())
4134 return false;
4135 auto Reg = Op.getReg();
4136 if (Reg.isPhysical())
4137 return AArch64::FPR16RegClass.contains(Reg);
4138 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4139 return TRC == &AArch64::FPR16RegClass ||
4140 TRC == &AArch64::FPR16_loRegClass;
4141 };
4142 return llvm::any_of(MI.operands(), IsHFPR);
4143}
4144
4146 auto IsQFPR = [&](const MachineOperand &Op) {
4147 if (!Op.isReg())
4148 return false;
4149 auto Reg = Op.getReg();
4150 if (Reg.isPhysical())
4151 return AArch64::FPR128RegClass.contains(Reg);
4152 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4153 return TRC == &AArch64::FPR128RegClass ||
4154 TRC == &AArch64::FPR128_loRegClass;
4155 };
4156 return llvm::any_of(MI.operands(), IsQFPR);
4157}
4158
4160 switch (MI.getOpcode()) {
4161 case AArch64::BRK:
4162 case AArch64::HLT:
4163 case AArch64::PACIASP:
4164 case AArch64::PACIBSP:
4165 // Implicit BTI behavior.
4166 return true;
4167 case AArch64::PAUTH_PROLOGUE:
4168 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4169 return true;
4170 case AArch64::HINT: {
4171 unsigned Imm = MI.getOperand(0).getImm();
4172 // Explicit BTI instruction.
4173 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4174 return true;
4175 // PACI(A|B)SP instructions.
4176 if (Imm == 25 || Imm == 27)
4177 return true;
4178 return false;
4179 }
4180 default:
4181 return false;
4182 }
4183}
4184
4186 auto IsFPR = [&](const MachineOperand &Op) {
4187 if (!Op.isReg())
4188 return false;
4189 auto Reg = Op.getReg();
4190 if (Reg.isPhysical())
4191 return AArch64::FPR128RegClass.contains(Reg) ||
4192 AArch64::FPR64RegClass.contains(Reg) ||
4193 AArch64::FPR32RegClass.contains(Reg) ||
4194 AArch64::FPR16RegClass.contains(Reg) ||
4195 AArch64::FPR8RegClass.contains(Reg);
4196
4197 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4198 return TRC == &AArch64::FPR128RegClass ||
4199 TRC == &AArch64::FPR128_loRegClass ||
4200 TRC == &AArch64::FPR64RegClass ||
4201 TRC == &AArch64::FPR64_loRegClass ||
4202 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4203 TRC == &AArch64::FPR8RegClass;
4204 };
4205 return llvm::any_of(MI.operands(), IsFPR);
4206}
4207
4208// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4209// scaled.
4210static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4211 int Scale = AArch64InstrInfo::getMemScale(Opc);
4212
4213 // If the byte-offset isn't a multiple of the stride, we can't scale this
4214 // offset.
4215 if (Offset % Scale != 0)
4216 return false;
4217
4218 // Convert the byte-offset used by unscaled into an "element" offset used
4219 // by the scaled pair load/store instructions.
4220 Offset /= Scale;
4221 return true;
4222}
4223
4224static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4225 if (FirstOpc == SecondOpc)
4226 return true;
4227 // We can also pair sign-ext and zero-ext instructions.
4228 switch (FirstOpc) {
4229 default:
4230 return false;
4231 case AArch64::STRSui:
4232 case AArch64::STURSi:
4233 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4234 case AArch64::STRDui:
4235 case AArch64::STURDi:
4236 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4237 case AArch64::STRQui:
4238 case AArch64::STURQi:
4239 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4240 case AArch64::STRWui:
4241 case AArch64::STURWi:
4242 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4243 case AArch64::STRXui:
4244 case AArch64::STURXi:
4245 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4246 case AArch64::LDRSui:
4247 case AArch64::LDURSi:
4248 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4249 case AArch64::LDRDui:
4250 case AArch64::LDURDi:
4251 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4252 case AArch64::LDRQui:
4253 case AArch64::LDURQi:
4254 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4255 case AArch64::LDRWui:
4256 case AArch64::LDURWi:
4257 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4258 case AArch64::LDRSWui:
4259 case AArch64::LDURSWi:
4260 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4261 case AArch64::LDRXui:
4262 case AArch64::LDURXi:
4263 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4264 }
4265 // These instructions can't be paired based on their opcodes.
4266 return false;
4267}
4268
4269static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4270 int64_t Offset1, unsigned Opcode1, int FI2,
4271 int64_t Offset2, unsigned Opcode2) {
4272 // Accesses through fixed stack object frame indices may access a different
4273 // fixed stack slot. Check that the object offsets + offsets match.
4274 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4275 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4276 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4277 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4278 // Convert to scaled object offsets.
4279 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4280 if (ObjectOffset1 % Scale1 != 0)
4281 return false;
4282 ObjectOffset1 /= Scale1;
4283 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4284 if (ObjectOffset2 % Scale2 != 0)
4285 return false;
4286 ObjectOffset2 /= Scale2;
4287 ObjectOffset1 += Offset1;
4288 ObjectOffset2 += Offset2;
4289 return ObjectOffset1 + 1 == ObjectOffset2;
4290 }
4291
4292 return FI1 == FI2;
4293}
4294
4295/// Detect opportunities for ldp/stp formation.
4296///
4297/// Only called for LdSt for which getMemOperandWithOffset returns true.
4299 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4300 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4301 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4302 unsigned NumBytes) const {
4303 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4304 const MachineOperand &BaseOp1 = *BaseOps1.front();
4305 const MachineOperand &BaseOp2 = *BaseOps2.front();
4306 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4307 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4308 if (BaseOp1.getType() != BaseOp2.getType())
4309 return false;
4310
4311 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4312 "Only base registers and frame indices are supported.");
4313
4314 // Check for both base regs and base FI.
4315 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4316 return false;
4317
4318 // Only cluster up to a single pair.
4319 if (ClusterSize > 2)
4320 return false;
4321
4322 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4323 return false;
4324
4325 // Can we pair these instructions based on their opcodes?
4326 unsigned FirstOpc = FirstLdSt.getOpcode();
4327 unsigned SecondOpc = SecondLdSt.getOpcode();
4328 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4329 return false;
4330
4331 // Can't merge volatiles or load/stores that have a hint to avoid pair
4332 // formation, for example.
4333 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4334 !isCandidateToMergeOrPair(SecondLdSt))
4335 return false;
4336
4337 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4338 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4339 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4340 return false;
4341
4342 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4343 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4344 return false;
4345
4346 // Pairwise instructions have a 7-bit signed offset field.
4347 if (Offset1 > 63 || Offset1 < -64)
4348 return false;
4349
4350 // The caller should already have ordered First/SecondLdSt by offset.
4351 // Note: except for non-equal frame index bases
4352 if (BaseOp1.isFI()) {
4353 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4354 "Caller should have ordered offsets.");
4355
4356 const MachineFrameInfo &MFI =
4357 FirstLdSt.getParent()->getParent()->getFrameInfo();
4358 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4359 BaseOp2.getIndex(), Offset2, SecondOpc);
4360 }
4361
4362 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4363
4364 return Offset1 + 1 == Offset2;
4365}
4366
4368 unsigned Reg, unsigned SubIdx,
4369 unsigned State,
4370 const TargetRegisterInfo *TRI) {
4371 if (!SubIdx)
4372 return MIB.addReg(Reg, State);
4373
4375 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4376 return MIB.addReg(Reg, State, SubIdx);
4377}
4378
4379static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
4380 unsigned NumRegs) {
4381 // We really want the positive remainder mod 32 here, that happens to be
4382 // easily obtainable with a mask.
4383 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
4384}
4385
4388 const DebugLoc &DL, MCRegister DestReg,
4389 MCRegister SrcReg, bool KillSrc,
4390 unsigned Opcode,
4391 ArrayRef<unsigned> Indices) const {
4392 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
4394 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4395 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4396 unsigned NumRegs = Indices.size();
4397
4398 int SubReg = 0, End = NumRegs, Incr = 1;
4399 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
4400 SubReg = NumRegs - 1;
4401 End = -1;
4402 Incr = -1;
4403 }
4404
4405 for (; SubReg != End; SubReg += Incr) {
4406 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4407 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4408 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
4409 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4410 }
4411}
4412
4415 DebugLoc DL, unsigned DestReg,
4416 unsigned SrcReg, bool KillSrc,
4417 unsigned Opcode, unsigned ZeroReg,
4418 llvm::ArrayRef<unsigned> Indices) const {
4420 unsigned NumRegs = Indices.size();
4421
4422#ifndef NDEBUG
4423 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
4424 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
4425 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
4426 "GPR reg sequences should not be able to overlap");
4427#endif
4428
4429 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
4430 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
4431 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
4432 MIB.addReg(ZeroReg);
4433 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
4434 MIB.addImm(0);
4435 }
4436}
4437
4440 const DebugLoc &DL, MCRegister DestReg,
4441 MCRegister SrcReg, bool KillSrc) const {
4442 if (AArch64::GPR32spRegClass.contains(DestReg) &&
4443 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
4445
4446 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
4447 // If either operand is WSP, expand to ADD #0.
4448 if (Subtarget.hasZeroCycleRegMove()) {
4449 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
4450 MCRegister DestRegX = TRI->getMatchingSuperReg(
4451 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4452 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4453 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4454 // This instruction is reading and writing X registers. This may upset
4455 // the register scavenger and machine verifier, so we need to indicate
4456 // that we are reading an undefined value from SrcRegX, but a proper
4457 // value from SrcReg.
4458 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
4459 .addReg(SrcRegX, RegState::Undef)
4460 .addImm(0)
4462 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4463 } else {
4464 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
4465 .addReg(SrcReg, getKillRegState(KillSrc))
4466 .addImm(0)
4468 }
4469 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
4470 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
4471 .addImm(0)
4473 } else {
4474 if (Subtarget.hasZeroCycleRegMove()) {
4475 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
4476 MCRegister DestRegX = TRI->getMatchingSuperReg(
4477 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4478 MCRegister SrcRegX = TRI->getMatchingSuperReg(
4479 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
4480 // This instruction is reading and writing X registers. This may upset
4481 // the register scavenger and machine verifier, so we need to indicate
4482 // that we are reading an undefined value from SrcRegX, but a proper
4483 // value from SrcReg.
4484 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
4485 .addReg(AArch64::XZR)
4486 .addReg(SrcRegX, RegState::Undef)
4487 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
4488 } else {
4489 // Otherwise, expand to ORR WZR.
4490 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
4491 .addReg(AArch64::WZR)
4492 .addReg(SrcReg, getKillRegState(KillSrc));
4493 }
4494 }
4495 return;
4496 }
4497
4498 // Copy a Predicate register by ORRing with itself.
4499 if (AArch64::PPRRegClass.contains(DestReg) &&
4500 AArch64::PPRRegClass.contains(SrcReg)) {
4502 "Unexpected SVE register.");
4503 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
4504 .addReg(SrcReg) // Pg
4505 .addReg(SrcReg)
4506 .addReg(SrcReg, getKillRegState(KillSrc));
4507 return;
4508 }
4509
4510 // Copy a predicate-as-counter register by ORRing with itself as if it
4511 // were a regular predicate (mask) register.
4512 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
4513 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
4514 if (DestIsPNR || SrcIsPNR) {
4515 auto ToPPR = [](MCRegister R) -> MCRegister {
4516 return (R - AArch64::PN0) + AArch64::P0;
4517 };
4518 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg;
4519 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg;
4520
4521 if (PPRSrcReg != PPRDestReg) {
4522 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
4523 .addReg(PPRSrcReg) // Pg
4524 .addReg(PPRSrcReg)
4525 .addReg(PPRSrcReg, getKillRegState(KillSrc));
4526 if (DestIsPNR)
4527 NewMI.addDef(DestReg, RegState::Implicit);
4528 }
4529 return;
4530 }
4531
4532 // Copy a Z register by ORRing with itself.
4533 if (AArch64::ZPRRegClass.contains(DestReg) &&
4534 AArch64::ZPRRegClass.contains(SrcReg)) {
4536 "Unexpected SVE register.");
4537 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
4538 .addReg(SrcReg)
4539 .addReg(SrcReg, getKillRegState(KillSrc));
4540 return;
4541 }
4542
4543 // Copy a Z register pair by copying the individual sub-registers.
4544 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
4545 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
4546 (AArch64::ZPR2RegClass.contains(SrcReg) ||
4547 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
4549 "Unexpected SVE register.");
4550 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
4551 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4552 Indices);
4553 return;
4554 }
4555
4556 // Copy a Z register triple by copying the individual sub-registers.
4557 if (AArch64::ZPR3RegClass.contains(DestReg) &&
4558 AArch64::ZPR3RegClass.contains(SrcReg)) {
4560 "Unexpected SVE register.");
4561 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4562 AArch64::zsub2};
4563 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4564 Indices);
4565 return;
4566 }
4567
4568 // Copy a Z register quad by copying the individual sub-registers.
4569 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
4570 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
4571 (AArch64::ZPR4RegClass.contains(SrcReg) ||
4572 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
4574 "Unexpected SVE register.");
4575 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
4576 AArch64::zsub2, AArch64::zsub3};
4577 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
4578 Indices);
4579 return;
4580 }
4581
4582 if (AArch64::GPR64spRegClass.contains(DestReg) &&
4583 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
4584 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
4585 // If either operand is SP, expand to ADD #0.
4586 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
4587 .addReg(SrcReg, getKillRegState(KillSrc))
4588 .addImm(0)
4590 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
4591 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
4592 .addImm(0)
4594 } else {
4595 // Otherwise, expand to ORR XZR.
4596 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
4597 .addReg(AArch64::XZR)
4598 .addReg(SrcReg, getKillRegState(KillSrc));
4599 }
4600 return;
4601 }
4602
4603 // Copy a DDDD register quad by copying the individual sub-registers.
4604 if (AArch64::DDDDRegClass.contains(DestReg) &&
4605 AArch64::DDDDRegClass.contains(SrcReg)) {
4606 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4607 AArch64::dsub2, AArch64::dsub3};
4608 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4609 Indices);
4610 return;
4611 }
4612
4613 // Copy a DDD register triple by copying the individual sub-registers.
4614 if (AArch64::DDDRegClass.contains(DestReg) &&
4615 AArch64::DDDRegClass.contains(SrcReg)) {
4616 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
4617 AArch64::dsub2};
4618 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4619 Indices);
4620 return;
4621 }
4622
4623 // Copy a DD register pair by copying the individual sub-registers.
4624 if (AArch64::DDRegClass.contains(DestReg) &&
4625 AArch64::DDRegClass.contains(SrcReg)) {
4626 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
4627 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
4628 Indices);
4629 return;
4630 }
4631
4632 // Copy a QQQQ register quad by copying the individual sub-registers.
4633 if (AArch64::QQQQRegClass.contains(DestReg) &&
4634 AArch64::QQQQRegClass.contains(SrcReg)) {
4635 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4636 AArch64::qsub2, AArch64::qsub3};
4637 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4638 Indices);
4639 return;
4640 }
4641
4642 // Copy a QQQ register triple by copying the individual sub-registers.
4643 if (AArch64::QQQRegClass.contains(DestReg) &&
4644 AArch64::QQQRegClass.contains(SrcReg)) {
4645 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
4646 AArch64::qsub2};
4647 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4648 Indices);
4649 return;
4650 }
4651
4652 // Copy a QQ register pair by copying the individual sub-registers.
4653 if (AArch64::QQRegClass.contains(DestReg) &&
4654 AArch64::QQRegClass.contains(SrcReg)) {
4655 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
4656 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
4657 Indices);
4658 return;
4659 }
4660
4661 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
4662 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
4663 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
4664 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
4665 AArch64::XZR, Indices);
4666 return;
4667 }
4668
4669 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
4670 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
4671 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
4672 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
4673 AArch64::WZR, Indices);
4674 return;
4675 }
4676
4677 if (AArch64::FPR128RegClass.contains(DestReg) &&
4678 AArch64::FPR128RegClass.contains(SrcReg)) {
4679 if (Subtarget.isSVEorStreamingSVEAvailable() &&
4680 !Subtarget.isNeonAvailable())
4681 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
4682 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
4683 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
4684 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
4685 else if (Subtarget.isNeonAvailable())
4686 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
4687 .addReg(SrcReg)
4688 .addReg(SrcReg, getKillRegState(KillSrc));
4689 else {
4690 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
4691 .addReg(AArch64::SP, RegState::Define)
4692 .addReg(SrcReg, getKillRegState(KillSrc))
4693 .addReg(AArch64::SP)
4694 .addImm(-16);
4695 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
4696 .addReg(AArch64::SP, RegState::Define)
4697 .addReg(DestReg, RegState::Define)
4698 .addReg(AArch64::SP)
4699 .addImm(16);
4700 }
4701 return;
4702 }
4703
4704 if (AArch64::FPR64RegClass.contains(DestReg) &&
4705 AArch64::FPR64RegClass.contains(SrcReg)) {
4706 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
4707 .addReg(SrcReg, getKillRegState(KillSrc));
4708 return;
4709 }
4710
4711 if (AArch64::FPR32RegClass.contains(DestReg) &&
4712 AArch64::FPR32RegClass.contains(SrcReg)) {
4713 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4714 .addReg(SrcReg, getKillRegState(KillSrc));
4715 return;
4716 }
4717
4718 if (AArch64::FPR16RegClass.contains(DestReg) &&
4719 AArch64::FPR16RegClass.contains(SrcReg)) {
4720 DestReg =
4721 RI.getMatchingSuperReg(DestReg, AArch64::hsub, &AArch64::FPR32RegClass);
4722 SrcReg =
4723 RI.getMatchingSuperReg(SrcReg, AArch64::hsub, &AArch64::FPR32RegClass);
4724 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4725 .addReg(SrcReg, getKillRegState(KillSrc));
4726 return;
4727 }
4728
4729 if (AArch64::FPR8RegClass.contains(DestReg) &&
4730 AArch64::FPR8RegClass.contains(SrcReg)) {
4731 DestReg =
4732 RI.getMatchingSuperReg(DestReg, AArch64::bsub, &AArch64::FPR32RegClass);
4733 SrcReg =
4734 RI.getMatchingSuperReg(SrcReg, AArch64::bsub, &AArch64::FPR32RegClass);
4735 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
4736 .addReg(SrcReg, getKillRegState(KillSrc));
4737 return;
4738 }
4739
4740 // Copies between GPR64 and FPR64.
4741 if (AArch64::FPR64RegClass.contains(DestReg) &&
4742 AArch64::GPR64RegClass.contains(SrcReg)) {
4743 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
4744 .addReg(SrcReg, getKillRegState(KillSrc));
4745 return;
4746 }
4747 if (AArch64::GPR64RegClass.contains(DestReg) &&
4748 AArch64::FPR64RegClass.contains(SrcReg)) {
4749 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
4750 .addReg(SrcReg, getKillRegState(KillSrc));
4751 return;
4752 }
4753 // Copies between GPR32 and FPR32.
4754 if (AArch64::FPR32RegClass.contains(DestReg) &&
4755 AArch64::GPR32RegClass.contains(SrcReg)) {
4756 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
4757 .addReg(SrcReg, getKillRegState(KillSrc));
4758 return;
4759 }
4760 if (AArch64::GPR32RegClass.contains(DestReg) &&
4761 AArch64::FPR32RegClass.contains(SrcReg)) {
4762 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
4763 .addReg(SrcReg, getKillRegState(KillSrc));
4764 return;
4765 }
4766
4767 if (DestReg == AArch64::NZCV) {
4768 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
4769 BuildMI(MBB, I, DL, get(AArch64::MSR))
4770 .addImm(AArch64SysReg::NZCV)
4771 .addReg(SrcReg, getKillRegState(KillSrc))
4772 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
4773 return;
4774 }
4775
4776 if (SrcReg == AArch64::NZCV) {
4777 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
4778 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
4779 .addImm(AArch64SysReg::NZCV)
4780 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
4781 return;
4782 }
4783
4784#ifndef NDEBUG
4786 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
4787 << TRI.getRegAsmName(SrcReg) << "\n";
4788#endif
4789 llvm_unreachable("unimplemented reg-to-reg copy");
4790}
4791
4794 MachineBasicBlock::iterator InsertBefore,
4795 const MCInstrDesc &MCID,
4796 Register SrcReg, bool IsKill,
4797 unsigned SubIdx0, unsigned SubIdx1, int FI,
4798 MachineMemOperand *MMO) {
4799 Register SrcReg0 = SrcReg;
4800 Register SrcReg1 = SrcReg;
4801 if (SrcReg.isPhysical()) {
4802 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
4803 SubIdx0 = 0;
4804 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
4805 SubIdx1 = 0;
4806 }
4807 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4808 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
4809 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
4810 .addFrameIndex(FI)
4811 .addImm(0)
4812 .addMemOperand(MMO);
4813}
4814
4817 Register SrcReg, bool isKill, int FI,
4818 const TargetRegisterClass *RC,
4819 const TargetRegisterInfo *TRI,
4820 Register VReg) const {
4821 MachineFunction &MF = *MBB.getParent();
4822 MachineFrameInfo &MFI = MF.getFrameInfo();
4823
4825 MachineMemOperand *MMO =
4827 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4828 unsigned Opc = 0;
4829 bool Offset = true;
4831 unsigned StackID = TargetStackID::Default;
4832 switch (TRI->getSpillSize(*RC)) {
4833 case 1:
4834 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
4835 Opc = AArch64::STRBui;
4836 break;
4837 case 2: {
4838 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
4839 Opc = AArch64::STRHui;
4840 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
4841 AArch64::PPRRegClass.hasSubClassEq(RC)) {
4843 "Unexpected register store without SVE store instructions");
4844 Opc = AArch64::STR_PXI;
4846 }
4847 break;
4848 }
4849 case 4:
4850 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
4851 Opc = AArch64::STRWui;
4852 if (SrcReg.isVirtual())
4853 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
4854 else
4855 assert(SrcReg != AArch64::WSP);
4856 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
4857 Opc = AArch64::STRSui;
4858 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
4859 Opc = AArch64::STR_PPXI;
4861 }
4862 break;
4863 case 8:
4864 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
4865 Opc = AArch64::STRXui;
4866 if (SrcReg.isVirtual())
4867 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
4868 else
4869 assert(SrcReg != AArch64::SP);
4870 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
4871 Opc = AArch64::STRDui;
4872 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
4874 get(AArch64::STPWi), SrcReg, isKill,
4875 AArch64::sube32, AArch64::subo32, FI, MMO);
4876 return;
4877 }
4878 break;
4879 case 16:
4880 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
4881 Opc = AArch64::STRQui;
4882 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
4883 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4884 Opc = AArch64::ST1Twov1d;
4885 Offset = false;
4886 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
4888 get(AArch64::STPXi), SrcReg, isKill,
4889 AArch64::sube64, AArch64::subo64, FI, MMO);
4890 return;
4891 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
4893 "Unexpected register store without SVE store instructions");
4894 Opc = AArch64::STR_ZXI;
4896 }
4897 break;
4898 case 24:
4899 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
4900 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4901 Opc = AArch64::ST1Threev1d;
4902 Offset = false;
4903 }
4904 break;
4905 case 32:
4906 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
4907 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4908 Opc = AArch64::ST1Fourv1d;
4909 Offset = false;
4910 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
4911 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4912 Opc = AArch64::ST1Twov2d;
4913 Offset = false;
4914 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
4915 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4917 "Unexpected register store without SVE store instructions");
4918 Opc = AArch64::STR_ZZXI;
4920 }
4921 break;
4922 case 48:
4923 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
4924 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4925 Opc = AArch64::ST1Threev2d;
4926 Offset = false;
4927 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
4929 "Unexpected register store without SVE store instructions");
4930 Opc = AArch64::STR_ZZZXI;
4932 }
4933 break;
4934 case 64:
4935 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
4936 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
4937 Opc = AArch64::ST1Fourv2d;
4938 Offset = false;
4939 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
4940 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
4942 "Unexpected register store without SVE store instructions");
4943 Opc = AArch64::STR_ZZZZXI;
4945 }
4946 break;
4947 }
4948 assert(Opc && "Unknown register class");
4949 MFI.setStackID(FI, StackID);
4950
4951 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
4952 .addReg(SrcReg, getKillRegState(isKill))
4953 .addFrameIndex(FI);
4954
4955 if (Offset)
4956 MI.addImm(0);
4957 if (PNRReg.isValid())
4958 MI.addDef(PNRReg, RegState::Implicit);
4959 MI.addMemOperand(MMO);
4960}
4961
4964 MachineBasicBlock::iterator InsertBefore,
4965 const MCInstrDesc &MCID,
4966 Register DestReg, unsigned SubIdx0,
4967 unsigned SubIdx1, int FI,
4968 MachineMemOperand *MMO) {
4969 Register DestReg0 = DestReg;
4970 Register DestReg1 = DestReg;
4971 bool IsUndef = true;
4972 if (DestReg.isPhysical()) {
4973 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
4974 SubIdx0 = 0;
4975 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
4976 SubIdx1 = 0;
4977 IsUndef = false;
4978 }
4979 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
4980 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
4981 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
4982 .addFrameIndex(FI)
4983 .addImm(0)
4984 .addMemOperand(MMO);
4985}
4986
4989 Register DestReg, int FI,
4990 const TargetRegisterClass *RC,
4991 const TargetRegisterInfo *TRI,
4992 Register VReg) const {
4993 MachineFunction &MF = *MBB.getParent();
4994 MachineFrameInfo &MFI = MF.getFrameInfo();
4996 MachineMemOperand *MMO =
4998 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
4999
5000 unsigned Opc = 0;
5001 bool Offset = true;
5002 unsigned StackID = TargetStackID::Default;
5004 switch (TRI->getSpillSize(*RC)) {
5005 case 1:
5006 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5007 Opc = AArch64::LDRBui;
5008 break;
5009 case 2: {
5010 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5011 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5012 Opc = AArch64::LDRHui;
5013 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
5015 "Unexpected register load without SVE load instructions");
5016 if (IsPNR)
5017 PNRReg = DestReg;
5018 Opc = AArch64::LDR_PXI;
5020 }
5021 break;
5022 }
5023 case 4:
5024 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5025 Opc = AArch64::LDRWui;
5026 if (DestReg.isVirtual())
5027 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5028 else
5029 assert(DestReg != AArch64::WSP);
5030 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5031 Opc = AArch64::LDRSui;
5032 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5033 Opc = AArch64::LDR_PPXI;
5035 }
5036 break;
5037 case 8:
5038 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5039 Opc = AArch64::LDRXui;
5040 if (DestReg.isVirtual())
5041 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5042 else
5043 assert(DestReg != AArch64::SP);
5044 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5045 Opc = AArch64::LDRDui;
5046 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5048 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5049 AArch64::subo32, FI, MMO);
5050 return;
5051 }
5052 break;
5053 case 16:
5054 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5055 Opc = AArch64::LDRQui;
5056 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5057 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5058 Opc = AArch64::LD1Twov1d;
5059 Offset = false;
5060 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5062 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5063 AArch64::subo64, FI, MMO);
5064 return;
5065 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5067 "Unexpected register load without SVE load instructions");
5068 Opc = AArch64::LDR_ZXI;
5070 }
5071 break;
5072 case 24:
5073 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5074 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5075 Opc = AArch64::LD1Threev1d;
5076 Offset = false;
5077 }
5078 break;
5079 case 32:
5080 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5081 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5082 Opc = AArch64::LD1Fourv1d;
5083 Offset = false;
5084 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5085 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5086 Opc = AArch64::LD1Twov2d;
5087 Offset = false;
5088 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC) ||
5089 AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5091 "Unexpected register load without SVE load instructions");
5092 Opc = AArch64::LDR_ZZXI;
5094 }
5095 break;
5096 case 48:
5097 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5098 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5099 Opc = AArch64::LD1Threev2d;
5100 Offset = false;
5101 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5103 "Unexpected register load without SVE load instructions");
5104 Opc = AArch64::LDR_ZZZXI;
5106 }
5107 break;
5108 case 64:
5109 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5110 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5111 Opc = AArch64::LD1Fourv2d;
5112 Offset = false;
5113 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC) ||
5114 AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5116 "Unexpected register load without SVE load instructions");
5117 Opc = AArch64::LDR_ZZZZXI;
5119 }
5120 break;
5121 }
5122
5123 assert(Opc && "Unknown register class");
5124 MFI.setStackID(FI, StackID);
5125
5126 const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
5127 .addReg(DestReg, getDefRegState(true))
5128 .addFrameIndex(FI);
5129 if (Offset)
5130 MI.addImm(0);
5131 if (PNRReg.isValid() && !PNRReg.isVirtual())
5132 MI.addDef(PNRReg, RegState::Implicit);
5133 MI.addMemOperand(MMO);
5134
5135 if (PNRReg.isValid() && PNRReg.isVirtual())
5136 BuildMI(MBB, MBBI, DebugLoc(), get(TargetOpcode::COPY), PNRReg)
5137 .addReg(DestReg);
5138}
5139
5141 const MachineInstr &UseMI,
5142 const TargetRegisterInfo *TRI) {
5143 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5144 UseMI.getIterator()),
5145 [TRI](const MachineInstr &I) {
5146 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5147 I.readsRegister(AArch64::NZCV, TRI);
5148 });
5149}
5150
5152 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5153 // The smallest scalable element supported by scaled SVE addressing
5154 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5155 // byte offset must always be a multiple of 2.
5156 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5157
5158 // VGSized offsets are divided by '2', because the VG register is the
5159 // the number of 64bit granules as opposed to 128bit vector chunks,
5160 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5161 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5162 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5163 ByteSized = Offset.getFixed();
5164 VGSized = Offset.getScalable() / 2;
5165}
5166
5167/// Returns the offset in parts to which this frame offset can be
5168/// decomposed for the purpose of describing a frame offset.
5169/// For non-scalable offsets this is simply its byte size.
5171 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5172 int64_t &NumDataVectors) {
5173 // The smallest scalable element supported by scaled SVE addressing
5174 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5175 // byte offset must always be a multiple of 2.
5176 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5177
5178 NumBytes = Offset.getFixed();
5179 NumDataVectors = 0;
5180 NumPredicateVectors = Offset.getScalable() / 2;
5181 // This method is used to get the offsets to adjust the frame offset.
5182 // If the function requires ADDPL to be used and needs more than two ADDPL
5183 // instructions, part of the offset is folded into NumDataVectors so that it
5184 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5185 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5186 NumPredicateVectors > 62) {
5187 NumDataVectors = NumPredicateVectors / 8;
5188 NumPredicateVectors -= NumDataVectors * 8;
5189 }
5190}
5191
5192// Convenience function to create a DWARF expression for
5193// Expr + NumBytes + NumVGScaledBytes * AArch64::VG
5194static void appendVGScaledOffsetExpr(SmallVectorImpl<char> &Expr, int NumBytes,
5195 int NumVGScaledBytes, unsigned VG,
5196 llvm::raw_string_ostream &Comment) {
5197 uint8_t buffer[16];
5198
5199 if (NumBytes) {
5200 Expr.push_back(dwarf::DW_OP_consts);
5201 Expr.append(buffer, buffer + encodeSLEB128(NumBytes, buffer));
5202 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5203 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
5204 }
5205
5206 if (NumVGScaledBytes) {
5207 Expr.push_back((uint8_t)dwarf::DW_OP_consts);
5208 Expr.append(buffer, buffer + encodeSLEB128(NumVGScaledBytes, buffer));
5209
5210 Expr.push_back((uint8_t)dwarf::DW_OP_bregx);
5211 Expr.append(buffer, buffer + encodeULEB128(VG, buffer));
5212 Expr.push_back(0);
5213
5214 Expr.push_back((uint8_t)dwarf::DW_OP_mul);
5215 Expr.push_back((uint8_t)dwarf::DW_OP_plus);
5216
5217 Comment << (NumVGScaledBytes < 0 ? " - " : " + ")
5218 << std::abs(NumVGScaledBytes) << " * VG";
5219 }
5220}
5221
5222// Creates an MCCFIInstruction:
5223// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
5225 unsigned Reg,
5226 const StackOffset &Offset) {
5227 int64_t NumBytes, NumVGScaledBytes;
5229 NumVGScaledBytes);
5230 std::string CommentBuffer;
5231 llvm::raw_string_ostream Comment(CommentBuffer);
5232
5233 if (Reg == AArch64::SP)
5234 Comment << "sp";
5235 else if (Reg == AArch64::FP)
5236 Comment << "fp";
5237 else
5238 Comment << printReg(Reg, &TRI);
5239
5240 // Build up the expression (Reg + NumBytes + NumVGScaledBytes * AArch64::VG)
5241 SmallString<64> Expr;
5242 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5243 Expr.push_back((uint8_t)(dwarf::DW_OP_breg0 + DwarfReg));
5244 Expr.push_back(0);
5245 appendVGScaledOffsetExpr(Expr, NumBytes, NumVGScaledBytes,
5246 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5247
5248 // Wrap this into DW_CFA_def_cfa.
5249 SmallString<64> DefCfaExpr;
5250 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
5251 uint8_t buffer[16];
5252 DefCfaExpr.append(buffer, buffer + encodeULEB128(Expr.size(), buffer));
5253 DefCfaExpr.append(Expr.str());
5254 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
5255 Comment.str());
5256}
5257
5259 unsigned FrameReg, unsigned Reg,
5260 const StackOffset &Offset,
5261 bool LastAdjustmentWasScalable) {
5262 if (Offset.getScalable())
5263 return createDefCFAExpression(TRI, Reg, Offset);
5264
5265 if (FrameReg == Reg && !LastAdjustmentWasScalable)
5266 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
5267
5268 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5269 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
5270}
5271
5273 unsigned Reg,
5274 const StackOffset &OffsetFromDefCFA) {
5275 int64_t NumBytes, NumVGScaledBytes;
5277 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
5278
5279 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
5280
5281 // Non-scalable offsets can use DW_CFA_offset directly.
5282 if (!NumVGScaledBytes)
5283 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
5284
5285 std::string CommentBuffer;
5286 llvm::raw_string_ostream Comment(CommentBuffer);
5287 Comment << printReg(Reg, &TRI) << " @ cfa";
5288
5289 // Build up expression (NumBytes + NumVGScaledBytes * AArch64::VG)
5290 SmallString<64> OffsetExpr;
5291 appendVGScaledOffsetExpr(OffsetExpr, NumBytes, NumVGScaledBytes,
5292 TRI.getDwarfRegNum(AArch64::VG, true), Comment);
5293
5294 // Wrap this into DW_CFA_expression
5295 SmallString<64> CfaExpr;
5296 CfaExpr.push_back(dwarf::DW_CFA_expression);
5297 uint8_t buffer[16];
5298 CfaExpr.append(buffer, buffer + encodeULEB128(DwarfReg, buffer));
5299 CfaExpr.append(buffer, buffer + encodeULEB128(OffsetExpr.size(), buffer));
5300 CfaExpr.append(OffsetExpr.str());
5301
5302 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
5303 Comment.str());
5304}
5305
5306// Helper function to emit a frame offset adjustment from a given
5307// pointer (SrcReg), stored into DestReg. This function is explicit
5308// in that it requires the opcode.
5311 const DebugLoc &DL, unsigned DestReg,
5312 unsigned SrcReg, int64_t Offset, unsigned Opc,
5313 const TargetInstrInfo *TII,
5314 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
5315 bool *HasWinCFI, bool EmitCFAOffset,
5316 StackOffset CFAOffset, unsigned FrameReg) {
5317 int Sign = 1;
5318 unsigned MaxEncoding, ShiftSize;
5319 switch (Opc) {
5320 case AArch64::ADDXri:
5321 case AArch64::ADDSXri:
5322 case AArch64::SUBXri:
5323 case AArch64::SUBSXri:
5324 MaxEncoding = 0xfff;
5325 ShiftSize = 12;
5326 break;
5327 case AArch64::ADDVL_XXI:
5328 case AArch64::ADDPL_XXI:
5329 case AArch64::ADDSVL_XXI:
5330 case AArch64::ADDSPL_XXI:
5331 MaxEncoding = 31;
5332 ShiftSize = 0;
5333 if (Offset < 0) {
5334 MaxEncoding = 32;
5335 Sign = -1;
5336 Offset = -Offset;
5337 }
5338 break;
5339 default:
5340 llvm_unreachable("Unsupported opcode");
5341 }
5342
5343 // `Offset` can be in bytes or in "scalable bytes".
5344 int VScale = 1;
5345 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
5346 VScale = 16;
5347 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
5348 VScale = 2;
5349
5350 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
5351 // scratch register. If DestReg is a virtual register, use it as the
5352 // scratch register; otherwise, create a new virtual register (to be
5353 // replaced by the scavenger at the end of PEI). That case can be optimized
5354 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
5355 // register can be loaded with offset%8 and the add/sub can use an extending
5356 // instruction with LSL#3.
5357 // Currently the function handles any offsets but generates a poor sequence
5358 // of code.
5359 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
5360
5361 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
5362 Register TmpReg = DestReg;
5363 if (TmpReg == AArch64::XZR)
5365 &AArch64::GPR64RegClass);
5366 do {
5367 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
5368 unsigned LocalShiftSize = 0;
5369 if (ThisVal > MaxEncoding) {
5370 ThisVal = ThisVal >> ShiftSize;
5371 LocalShiftSize = ShiftSize;
5372 }
5373 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
5374 "Encoding cannot handle value that big");
5375
5376 Offset -= ThisVal << LocalShiftSize;
5377 if (Offset == 0)
5378 TmpReg = DestReg;
5379 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
5380 .addReg(SrcReg)
5381 .addImm(Sign * (int)ThisVal);
5382 if (ShiftSize)
5383 MBI = MBI.addImm(
5385 MBI = MBI.setMIFlag(Flag);
5386
5387 auto Change =
5388 VScale == 1
5389 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
5390 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
5391 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
5392 CFAOffset += Change;
5393 else
5394 CFAOffset -= Change;
5395 if (EmitCFAOffset && DestReg == TmpReg) {
5396 MachineFunction &MF = *MBB.getParent();
5397 const TargetSubtargetInfo &STI = MF.getSubtarget();
5398 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
5399
5400 unsigned CFIIndex = MF.addFrameInst(
5401 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
5402 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
5403 .addCFIIndex(CFIIndex)
5404 .setMIFlags(Flag);
5405 }
5406
5407 if (NeedsWinCFI) {
5408 assert(Sign == 1 && "SEH directives should always have a positive sign");
5409 int Imm = (int)(ThisVal << LocalShiftSize);
5410 if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
5411 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
5412 if (HasWinCFI)
5413 *HasWinCFI = true;
5414 if (Imm == 0)
5415 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
5416 else
5417 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
5418 .addImm(Imm)
5419 .setMIFlag(Flag);
5420 assert(Offset == 0 && "Expected remaining offset to be zero to "
5421 "emit a single SEH directive");
5422 } else if (DestReg == AArch64::SP) {
5423 if (HasWinCFI)
5424 *HasWinCFI = true;
5425 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
5426 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
5427 .addImm(Imm)
5428 .setMIFlag(Flag);
5429 }
5430 }
5431
5432 SrcReg = TmpReg;
5433 } while (Offset);
5434}
5435
5438 unsigned DestReg, unsigned SrcReg,
5440 MachineInstr::MIFlag Flag, bool SetNZCV,
5441 bool NeedsWinCFI, bool *HasWinCFI,
5442 bool EmitCFAOffset, StackOffset CFAOffset,
5443 unsigned FrameReg) {
5444 // If a function is marked as arm_locally_streaming, then the runtime value of
5445 // vscale in the prologue/epilogue is different the runtime value of vscale
5446 // in the function's body. To avoid having to consider multiple vscales,
5447 // we can use `addsvl` to allocate any scalable stack-slots, which under
5448 // most circumstances will be only locals, not callee-save slots.
5449 const Function &F = MBB.getParent()->getFunction();
5450 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
5451
5452 int64_t Bytes, NumPredicateVectors, NumDataVectors;
5454 Offset, Bytes, NumPredicateVectors, NumDataVectors);
5455
5456 // First emit non-scalable frame offsets, or a simple 'mov'.
5457 if (Bytes || (!Offset && SrcReg != DestReg)) {
5458 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
5459 "SP increment/decrement not 8-byte aligned");
5460 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
5461 if (Bytes < 0) {
5462 Bytes = -Bytes;
5463 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
5464 }
5465 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
5466 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
5467 FrameReg);
5468 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
5469 ? StackOffset::getFixed(-Bytes)
5470 : StackOffset::getFixed(Bytes);
5471 SrcReg = DestReg;
5472 FrameReg = DestReg;
5473 }
5474
5475 assert(!(SetNZCV && (NumPredicateVectors || NumDataVectors)) &&
5476 "SetNZCV not supported with SVE vectors");
5477 assert(!(NeedsWinCFI && (NumPredicateVectors || NumDataVectors)) &&
5478 "WinCFI not supported with SVE vectors");
5479
5480 if (NumDataVectors) {
5481 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
5482 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI,
5483 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5484 CFAOffset, FrameReg);
5485 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
5486 SrcReg = DestReg;
5487 }
5488
5489 if (NumPredicateVectors) {
5490 assert(DestReg != AArch64::SP && "Unaligned access to SP");
5491 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
5492 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI,
5493 TII, Flag, NeedsWinCFI, nullptr, EmitCFAOffset,
5494 CFAOffset, FrameReg);
5495 }
5496}
5497
5500 MachineBasicBlock::iterator InsertPt, int FrameIndex,
5501 LiveIntervals *LIS, VirtRegMap *VRM) const {
5502 // This is a bit of a hack. Consider this instruction:
5503 //
5504 // %0 = COPY %sp; GPR64all:%0
5505 //
5506 // We explicitly chose GPR64all for the virtual register so such a copy might
5507 // be eliminated by RegisterCoalescer. However, that may not be possible, and
5508 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
5509 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
5510 //
5511 // To prevent that, we are going to constrain the %0 register class here.
5512 if (MI.isFullCopy()) {
5513 Register DstReg = MI.getOperand(0).getReg();
5514 Register SrcReg = MI.getOperand(1).getReg();
5515 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
5516 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
5517 return nullptr;
5518 }
5519 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
5520 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5521 return nullptr;
5522 }
5523 // Nothing can folded with copy from/to NZCV.
5524 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
5525 return nullptr;
5526 }
5527
5528 // Handle the case where a copy is being spilled or filled but the source
5529 // and destination register class don't match. For example:
5530 //
5531 // %0 = COPY %xzr; GPR64common:%0
5532 //
5533 // In this case we can still safely fold away the COPY and generate the
5534 // following spill code:
5535 //
5536 // STRXui %xzr, %stack.0
5537 //
5538 // This also eliminates spilled cross register class COPYs (e.g. between x and
5539 // d regs) of the same size. For example:
5540 //
5541 // %0 = COPY %1; GPR64:%0, FPR64:%1
5542 //
5543 // will be filled as
5544 //
5545 // LDRDui %0, fi<#0>
5546 //
5547 // instead of
5548 //
5549 // LDRXui %Temp, fi<#0>
5550 // %0 = FMOV %Temp
5551 //
5552 if (MI.isCopy() && Ops.size() == 1 &&
5553 // Make sure we're only folding the explicit COPY defs/uses.
5554 (Ops[0] == 0 || Ops[0] == 1)) {
5555 bool IsSpill = Ops[0] == 0;
5556 bool IsFill = !IsSpill;
5558 const MachineRegisterInfo &MRI = MF.getRegInfo();
5559 MachineBasicBlock &MBB = *MI.getParent();
5560 const MachineOperand &DstMO = MI.getOperand(0);
5561 const MachineOperand &SrcMO = MI.getOperand(1);
5562 Register DstReg = DstMO.getReg();
5563 Register SrcReg = SrcMO.getReg();
5564 // This is slightly expensive to compute for physical regs since
5565 // getMinimalPhysRegClass is slow.
5566 auto getRegClass = [&](unsigned Reg) {
5567 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
5568 : TRI.getMinimalPhysRegClass(Reg);
5569 };
5570
5571 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
5572 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
5573 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
5574 "Mismatched register size in non subreg COPY");
5575 if (IsSpill)
5576 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
5577 getRegClass(SrcReg), &TRI, Register());
5578 else
5579 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
5580 getRegClass(DstReg), &TRI, Register());
5581 return &*--InsertPt;
5582 }
5583
5584 // Handle cases like spilling def of:
5585 //
5586 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
5587 //
5588 // where the physical register source can be widened and stored to the full
5589 // virtual reg destination stack slot, in this case producing:
5590 //
5591 // STRXui %xzr, %stack.0
5592 //
5593 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
5594 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
5595 assert(SrcMO.getSubReg() == 0 &&
5596 "Unexpected subreg on physical register");
5597 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
5598 FrameIndex, &AArch64::GPR64RegClass, &TRI,
5599 Register());
5600 return &*--InsertPt;
5601 }
5602
5603 // Handle cases like filling use of:
5604 //
5605 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
5606 //
5607 // where we can load the full virtual reg source stack slot, into the subreg
5608 // destination, in this case producing:
5609 //
5610 // LDRWui %0:sub_32<def,read-undef>, %stack.0
5611 //
5612 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
5613 const TargetRegisterClass *FillRC;
5614 switch (DstMO.getSubReg()) {
5615 default:
5616 FillRC = nullptr;
5617 break;
5618 case AArch64::sub_32:
5619 FillRC = &AArch64::GPR32RegClass;
5620 break;
5621 case AArch64::ssub:
5622 FillRC = &AArch64::FPR32RegClass;
5623 break;
5624 case AArch64::dsub:
5625 FillRC = &AArch64::FPR64RegClass;
5626 break;
5627 }
5628
5629 if (FillRC) {
5630 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
5631 TRI.getRegSizeInBits(*FillRC) &&
5632 "Mismatched regclass size on folded subreg COPY");
5633 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
5634 Register());
5635 MachineInstr &LoadMI = *--InsertPt;
5636 MachineOperand &LoadDst = LoadMI.getOperand(0);
5637 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
5638 LoadDst.setSubReg(DstMO.getSubReg());
5639 LoadDst.setIsUndef();
5640 return &LoadMI;
5641 }
5642 }
5643 }
5644
5645 // Cannot fold.
5646 return nullptr;
5647}
5648
5650 StackOffset &SOffset,
5651 bool *OutUseUnscaledOp,
5652 unsigned *OutUnscaledOp,
5653 int64_t *EmittableOffset) {
5654 // Set output values in case of early exit.
5655 if (EmittableOffset)
5656 *EmittableOffset = 0;
5657 if (OutUseUnscaledOp)
5658 *OutUseUnscaledOp = false;
5659 if (OutUnscaledOp)
5660 *OutUnscaledOp = 0;
5661
5662 // Exit early for structured vector spills/fills as they can't take an
5663 // immediate offset.
5664 switch (MI.getOpcode()) {
5665 default:
5666 break;
5667 case AArch64::LD1Rv1d:
5668 case AArch64::LD1Rv2s:
5669 case AArch64::LD1Rv2d:
5670 case AArch64::LD1Rv4h:
5671 case AArch64::LD1Rv4s:
5672 case AArch64::LD1Rv8b:
5673 case AArch64::LD1Rv8h:
5674 case AArch64::LD1Rv16b:
5675 case AArch64::LD1Twov2d:
5676 case AArch64::LD1Threev2d:
5677 case AArch64::LD1Fourv2d:
5678 case AArch64::LD1Twov1d:
5679 case AArch64::LD1Threev1d:
5680 case AArch64::LD1Fourv1d:
5681 case AArch64::ST1Twov2d:
5682 case AArch64::ST1Threev2d:
5683 case AArch64::ST1Fourv2d:
5684 case AArch64::ST1Twov1d:
5685 case AArch64::ST1Threev1d:
5686 case AArch64::ST1Fourv1d:
5687 case AArch64::ST1i8:
5688 case AArch64::ST1i16:
5689 case AArch64::ST1i32:
5690 case AArch64::ST1i64:
5691 case AArch64::IRG:
5692 case AArch64::IRGstack:
5693 case AArch64::STGloop:
5694 case AArch64::STZGloop:
5696 }
5697
5698 // Get the min/max offset and the scale.
5699 TypeSize ScaleValue(0U, false), Width(0U, false);
5700 int64_t MinOff, MaxOff;
5701 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
5702 MaxOff))
5703 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
5704
5705 // Construct the complete offset.
5706 bool IsMulVL = ScaleValue.isScalable();
5707 unsigned Scale = ScaleValue.getKnownMinValue();
5708 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
5709
5710 const MachineOperand &ImmOpnd =
5711 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
5712 Offset += ImmOpnd.getImm() * Scale;
5713
5714 // If the offset doesn't match the scale, we rewrite the instruction to
5715 // use the unscaled instruction instead. Likewise, if we have a negative
5716 // offset and there is an unscaled op to use.
5717 std::optional<unsigned> UnscaledOp =
5719 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
5720 if (useUnscaledOp &&
5721 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
5722 MaxOff))
5723 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
5724
5725 Scale = ScaleValue.getKnownMinValue();
5726 assert(IsMulVL == ScaleValue.isScalable() &&
5727 "Unscaled opcode has different value for scalable");
5728
5729 int64_t Remainder = Offset % Scale;
5730 assert(!(Remainder && useUnscaledOp) &&
5731 "Cannot have remainder when using unscaled op");
5732
5733 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
5734 int64_t NewOffset = Offset / Scale;
5735 if (MinOff <= NewOffset && NewOffset <= MaxOff)
5736 Offset = Remainder;
5737 else {
5738 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
5739 Offset = Offset - (NewOffset * Scale);
5740 }
5741
5742 if (EmittableOffset)
5743 *EmittableOffset = NewOffset;
5744 if (OutUseUnscaledOp)
5745 *OutUseUnscaledOp = useUnscaledOp;
5746 if (OutUnscaledOp && UnscaledOp)
5747 *OutUnscaledOp = *UnscaledOp;
5748
5749 if (IsMulVL)
5750 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
5751 else
5752 SOffset = StackOffset::get(Offset, SOffset.getScalable());
5754 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
5755}
5756
5758 unsigned FrameReg, StackOffset &Offset,
5759 const AArch64InstrInfo *TII) {
5760 unsigned Opcode = MI.getOpcode();
5761 unsigned ImmIdx = FrameRegIdx + 1;
5762
5763 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
5764 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
5765 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
5766 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
5767 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
5768 MI.eraseFromParent();
5769 Offset = StackOffset();
5770 return true;
5771 }
5772
5773 int64_t NewOffset;
5774 unsigned UnscaledOp;
5775 bool UseUnscaledOp;
5776 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
5777 &UnscaledOp, &NewOffset);
5780 // Replace the FrameIndex with FrameReg.
5781 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
5782 if (UseUnscaledOp)
5783 MI.setDesc(TII->get(UnscaledOp));
5784
5785 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
5786 return !Offset;
5787 }
5788
5789 return false;
5790}
5791
5794 DebugLoc DL;
5795 BuildMI(MBB, MI, DL, get(AArch64::HINT)).addImm(0);
5796}
5797
5799 return MCInstBuilder(AArch64::HINT).addImm(0);
5800}
5801
5802// AArch64 supports MachineCombiner.
5803bool AArch64InstrInfo::useMachineCombiner() const { return true; }
5804
5805// True when Opc sets flag
5806static bool isCombineInstrSettingFlag(unsigned Opc) {
5807 switch (Opc) {
5808 case AArch64::ADDSWrr:
5809 case AArch64::ADDSWri:
5810 case AArch64::ADDSXrr:
5811 case AArch64::ADDSXri:
5812 case AArch64::SUBSWrr:
5813 case AArch64::SUBSXrr:
5814 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5815 case AArch64::SUBSWri:
5816 case AArch64::SUBSXri:
5817 return true;
5818 default:
5819 break;
5820 }
5821 return false;
5822}
5823
5824// 32b Opcodes that can be combined with a MUL
5825static bool isCombineInstrCandidate32(unsigned Opc) {
5826 switch (Opc) {
5827 case AArch64::ADDWrr:
5828 case AArch64::ADDWri:
5829 case AArch64::SUBWrr:
5830 case AArch64::ADDSWrr:
5831 case AArch64::ADDSWri:
5832 case AArch64::SUBSWrr:
5833 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5834 case AArch64::SUBWri:
5835 case AArch64::SUBSWri:
5836 return true;
5837 default:
5838 break;
5839 }
5840 return false;
5841}
5842
5843// 64b Opcodes that can be combined with a MUL
5844static bool isCombineInstrCandidate64(unsigned Opc) {
5845 switch (Opc) {
5846 case AArch64::ADDXrr:
5847 case AArch64::ADDXri:
5848 case AArch64::SUBXrr:
5849 case AArch64::ADDSXrr:
5850 case AArch64::ADDSXri:
5851 case AArch64::SUBSXrr:
5852 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
5853 case AArch64::SUBXri:
5854 case AArch64::SUBSXri:
5855 case AArch64::ADDv8i8:
5856 case AArch64::ADDv16i8:
5857 case AArch64::ADDv4i16:
5858 case AArch64::ADDv8i16:
5859 case AArch64::ADDv2i32:
5860 case AArch64::ADDv4i32:
5861 case AArch64::SUBv8i8:
5862 case AArch64::SUBv16i8:
5863 case AArch64::SUBv4i16:
5864 case AArch64::SUBv8i16:
5865 case AArch64::SUBv2i32:
5866 case AArch64::SUBv4i32:
5867 return true;
5868 default:
5869 break;
5870 }
5871 return false;
5872}
5873
5874// FP Opcodes that can be combined with a FMUL.
5875static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
5876 switch (Inst.getOpcode()) {
5877 default:
5878 break;
5879 case AArch64::FADDHrr:
5880 case AArch64::FADDSrr:
5881 case AArch64::FADDDrr:
5882 case AArch64::FADDv4f16:
5883 case AArch64::FADDv8f16:
5884 case AArch64::FADDv2f32:
5885 case AArch64::FADDv2f64:
5886 case AArch64::FADDv4f32:
5887 case AArch64::FSUBHrr:
5888 case AArch64::FSUBSrr:
5889 case AArch64::FSUBDrr:
5890 case AArch64::FSUBv4f16:
5891 case AArch64::FSUBv8f16:
5892 case AArch64::FSUBv2f32:
5893 case AArch64::FSUBv2f64:
5894 case AArch64::FSUBv4f32:
5896 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
5897 // the target options or if FADD/FSUB has the contract fast-math flag.
5898 return Options.UnsafeFPMath ||
5899 Options.AllowFPOpFusion == FPOpFusion::Fast ||
5901 return true;
5902 }
5903 return false;
5904}
5905
5906// Opcodes that can be combined with a MUL
5907static bool isCombineInstrCandidate(unsigned Opc) {
5909}
5910
5911//
5912// Utility routine that checks if \param MO is defined by an
5913// \param CombineOpc instruction in the basic block \param MBB
5915 unsigned CombineOpc, unsigned ZeroReg = 0,
5916 bool CheckZeroReg = false) {
5918 MachineInstr *MI = nullptr;
5919
5920 if (MO.isReg() && MO.getReg().isVirtual())
5921 MI = MRI.getUniqueVRegDef(MO.getReg());
5922 // And it needs to be in the trace (otherwise, it won't have a depth).
5923 if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
5924 return false;
5925 // Must only used by the user we combine with.
5926 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
5927 return false;
5928
5929 if (CheckZeroReg) {
5930 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
5931 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
5932 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
5933 // The third input reg must be zero.
5934 if (MI->getOperand(3).getReg() != ZeroReg)
5935 return false;
5936 }
5937
5938 if (isCombineInstrSettingFlag(CombineOpc) &&
5939 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
5940 return false;
5941
5942 return true;
5943}
5944
5945//
5946// Is \param MO defined by an integer multiply and can be combined?
5948 unsigned MulOpc, unsigned ZeroReg) {
5949 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
5950}
5951
5952//
5953// Is \param MO defined by a floating-point multiply and can be combined?
5955 unsigned MulOpc) {
5956 return canCombine(MBB, MO, MulOpc);
5957}
5958
5959// TODO: There are many more machine instruction opcodes to match:
5960// 1. Other data types (integer, vectors)
5961// 2. Other math / logic operations (xor, or)
5962// 3. Other forms of the same operation (intrinsics and other variants)
5964 bool Invert) const {
5965 if (Invert)
5966 return false;
5967 switch (Inst.getOpcode()) {
5968 // == Floating-point types ==
5969 // -- Floating-point instructions --
5970 case AArch64::FADDHrr:
5971 case AArch64::FADDSrr:
5972 case AArch64::FADDDrr:
5973 case AArch64::FMULHrr:
5974 case AArch64::FMULSrr:
5975 case AArch64::FMULDrr:
5976 case AArch64::FMULX16:
5977 case AArch64::FMULX32:
5978 case AArch64::FMULX64:
5979 // -- Advanced SIMD instructions --
5980 case AArch64::FADDv4f16:
5981 case AArch64::FADDv8f16:
5982 case AArch64::FADDv2f32:
5983 case AArch64::FADDv4f32:
5984 case AArch64::FADDv2f64:
5985 case AArch64::FMULv4f16:
5986 case AArch64::FMULv8f16:
5987 case AArch64::FMULv2f32:
5988 case AArch64::FMULv4f32:
5989 case AArch64::FMULv2f64:
5990 case AArch64::FMULXv4f16:
5991 case AArch64::FMULXv8f16:
5992 case AArch64::FMULXv2f32:
5993 case AArch64::FMULXv4f32:
5994 case AArch64::FMULXv2f64:
5995 // -- SVE instructions --
5996 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
5997 // in the SVE instruction set (though there are predicated ones).
5998 case AArch64::FADD_ZZZ_H:
5999 case AArch64::FADD_ZZZ_S:
6000 case AArch64::FADD_ZZZ_D:
6001 case AArch64::FMUL_ZZZ_H:
6002 case AArch64::FMUL_ZZZ_S:
6003 case AArch64::FMUL_ZZZ_D:
6004 return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath ||
6007
6008 // == Integer types ==
6009 // -- Base instructions --
6010 // Opcodes MULWrr and MULXrr don't exist because
6011 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6012 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6013 // The machine-combiner does not support three-source-operands machine
6014 // instruction. So we cannot reassociate MULs.
6015 case AArch64::ADDWrr:
6016 case AArch64::ADDXrr:
6017 case AArch64::ANDWrr:
6018 case AArch64::ANDXrr:
6019 case AArch64::ORRWrr:
6020 case AArch64::ORRXrr:
6021 case AArch64::EORWrr:
6022 case AArch64::EORXrr:
6023 case AArch64::EONWrr:
6024 case AArch64::EONXrr:
6025 // -- Advanced SIMD instructions --
6026 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6027 // in the Advanced SIMD instruction set.
6028 case AArch64::ADDv8i8:
6029 case AArch64::ADDv16i8:
6030 case AArch64::ADDv4i16:
6031 case AArch64::ADDv8i16:
6032 case AArch64::ADDv2i32:
6033 case AArch64::ADDv4i32:
6034 case AArch64::ADDv1i64:
6035 case AArch64::ADDv2i64:
6036 case AArch64::MULv8i8:
6037 case AArch64::MULv16i8:
6038 case AArch64::MULv4i16:
6039 case AArch64::MULv8i16:
6040 case AArch64::MULv2i32:
6041 case AArch64::MULv4i32:
6042 case AArch64::ANDv8i8:
6043 case AArch64::ANDv16i8:
6044 case AArch64::ORRv8i8:
6045 case AArch64::ORRv16i8:
6046 case AArch64::EORv8i8:
6047 case AArch64::EORv16i8:
6048 // -- SVE instructions --
6049 case AArch64::ADD_ZZZ_B:
6050 case AArch64::ADD_ZZZ_H:
6051 case AArch64::ADD_ZZZ_S:
6052 case AArch64::ADD_ZZZ_D:
6053 case AArch64::MUL_ZZZ_B:
6054 case AArch64::MUL_ZZZ_H:
6055 case AArch64::MUL_ZZZ_S:
6056 case AArch64::MUL_ZZZ_D:
6057 case AArch64::AND_ZZZ:
6058 case AArch64::ORR_ZZZ:
6059 case AArch64::EOR_ZZZ:
6060 return true;
6061
6062 default:
6063 return false;
6064 }
6065}
6066
6067/// Find instructions that can be turned into madd.
6069 SmallVectorImpl<unsigned> &Patterns) {
6070 unsigned Opc = Root.getOpcode();
6071 MachineBasicBlock &MBB = *Root.getParent();
6072 bool Found = false;
6073
6074 if (!isCombineInstrCandidate(Opc))
6075 return false;
6076 if (isCombineInstrSettingFlag(Opc)) {
6077 int Cmp_NZCV =
6078 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
6079 // When NZCV is live bail out.
6080 if (Cmp_NZCV == -1)
6081 return false;
6082 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6083 // When opcode can't change bail out.
6084 // CHECKME: do we miss any cases for opcode conversion?
6085 if (NewOpc == Opc)
6086 return false;
6087 Opc = NewOpc;
6088 }
6089
6090 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6091 unsigned Pattern) {
6092 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6093 Patterns.push_back(Pattern);
6094 Found = true;
6095 }
6096 };
6097
6098 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6099 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6100 Patterns.push_back(Pattern);
6101 Found = true;
6102 }
6103 };
6104
6106
6107 switch (Opc) {
6108 default:
6109 break;
6110 case AArch64::ADDWrr:
6111 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6112 "ADDWrr does not have register operands");
6113 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6114 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6115 break;
6116 case AArch64::ADDXrr:
6117 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6118 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6119 break;
6120 case AArch64::SUBWrr:
6121 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6122 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6123 break;
6124 case AArch64::SUBXrr:
6125 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6126 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6127 break;
6128 case AArch64::ADDWri:
6129 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6130 break;
6131 case AArch64::ADDXri:
6132 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6133 break;
6134 case AArch64::SUBWri:
6135 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6136 break;
6137 case AArch64::SUBXri:
6138 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6139 break;
6140 case AArch64::ADDv8i8:
6141 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
6142 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
6143 break;
6144 case AArch64::ADDv16i8:
6145 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
6146 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
6147 break;
6148 case AArch64::ADDv4i16:
6149 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
6150 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
6151 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
6152 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
6153 break;
6154 case AArch64::ADDv8i16:
6155 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
6156 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
6157 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
6158 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
6159 break;
6160 case AArch64::ADDv2i32:
6161 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
6162 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
6163 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
6164 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
6165 break;
6166 case AArch64::ADDv4i32:
6167 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
6168 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
6169 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
6170 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
6171 break;
6172 case AArch64::SUBv8i8:
6173 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
6174 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
6175 break;
6176 case AArch64::SUBv16i8:
6177 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
6178 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
6179 break;
6180 case AArch64::SUBv4i16:
6181 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
6182 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
6183 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
6184 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
6185 break;
6186 case AArch64::SUBv8i16:
6187 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
6188 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
6189 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
6190 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
6191 break;
6192 case AArch64::SUBv2i32:
6193 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
6194 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
6195 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
6196 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
6197 break;
6198 case AArch64::SUBv4i32:
6199 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
6200 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
6201 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
6202 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
6203 break;
6204 }
6205 return Found;
6206}
6207/// Floating-Point Support
6208
6209/// Find instructions that can be turned into madd.
6211 SmallVectorImpl<unsigned> &Patterns) {
6212
6213 if (!isCombineInstrCandidateFP(Root))
6214 return false;
6215
6216 MachineBasicBlock &MBB = *Root.getParent();
6217 bool Found = false;
6218
6219 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
6220 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
6221 Patterns.push_back(Pattern);
6222 return true;
6223 }
6224 return false;
6225 };
6226
6228
6229 switch (Root.getOpcode()) {
6230 default:
6231 assert(false && "Unsupported FP instruction in combiner\n");
6232 break;
6233 case AArch64::FADDHrr:
6234 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6235 "FADDHrr does not have register operands");
6236
6237 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
6238 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
6239 break;
6240 case AArch64::FADDSrr:
6241 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6242 "FADDSrr does not have register operands");
6243
6244 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
6245 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
6246
6247 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
6248 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
6249 break;
6250 case AArch64::FADDDrr:
6251 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
6252 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
6253
6254 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
6255 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
6256 break;
6257 case AArch64::FADDv4f16:
6258 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
6259 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
6260
6261 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
6262 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
6263 break;
6264 case AArch64::FADDv8f16:
6265 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
6266 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
6267
6268 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
6269 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
6270 break;
6271 case AArch64::FADDv2f32:
6272 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
6273 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
6274
6275 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
6276 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
6277 break;
6278 case AArch64::FADDv2f64:
6279 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
6280 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
6281
6282 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
6283 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
6284 break;
6285 case AArch64::FADDv4f32:
6286 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
6287 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
6288
6289 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
6290 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
6291 break;
6292 case AArch64::FSUBHrr:
6293 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
6294 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
6295 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
6296 break;
6297 case AArch64::FSUBSrr:
6298 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
6299
6300 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
6301 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
6302
6303 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
6304 break;
6305 case AArch64::FSUBDrr:
6306 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
6307
6308 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
6309 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
6310
6311 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
6312 break;
6313 case AArch64::FSUBv4f16:
6314 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
6315 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
6316
6317 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
6318 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
6319 break;
6320 case AArch64::FSUBv8f16:
6321 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
6322 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
6323
6324 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
6325 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
6326 break;
6327 case AArch64::FSUBv2f32:
6328 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
6329 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
6330
6331 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
6332 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
6333 break;
6334 case AArch64::FSUBv2f64:
6335 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
6336 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
6337
6338 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
6339 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
6340 break;
6341 case AArch64::FSUBv4f32:
6342 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
6343 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
6344
6345 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
6346 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
6347 break;
6348 }
6349 return Found;
6350}
6351
6353 SmallVectorImpl<unsigned> &Patterns) {
6354 MachineBasicBlock &MBB = *Root.getParent();
6355 bool Found = false;
6356
6357 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
6359 MachineOperand &MO = Root.getOperand(Operand);
6360 MachineInstr *MI = nullptr;
6361 if (MO.isReg() && MO.getReg().isVirtual())
6362 MI = MRI.getUniqueVRegDef(MO.getReg());
6363 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
6364 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
6365 MI->getOperand(1).getReg().isVirtual())
6366 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
6367 if (MI && MI->getOpcode() == Opcode) {
6368 Patterns.push_back(Pattern);
6369 return true;
6370 }
6371 return false;
6372 };
6373
6375
6376 switch (Root.getOpcode()) {
6377 default:
6378 return false;
6379 case AArch64::FMULv2f32:
6380 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
6381 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
6382 break;
6383 case AArch64::FMULv2f64:
6384 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
6385 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
6386 break;
6387 case AArch64::FMULv4f16:
6388 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
6389 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
6390 break;
6391 case AArch64::FMULv4f32:
6392 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
6393 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
6394 break;
6395 case AArch64::FMULv8f16:
6396 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
6397 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
6398 break;
6399 }
6400
6401 return Found;
6402}
6403
6405 SmallVectorImpl<unsigned> &Patterns) {
6406 unsigned Opc = Root.getOpcode();
6407 MachineBasicBlock &MBB = *Root.getParent();
6409
6410 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
6411 MachineOperand &MO = Root.getOperand(1);
6412 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
6413 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
6414 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
6418 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
6419 Patterns.push_back(Pattern);
6420 return true;
6421 }
6422 return false;
6423 };
6424
6425 switch (Opc) {
6426 default:
6427 break;
6428 case AArch64::FNEGDr:
6429 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
6430 case AArch64::FNEGSr:
6431 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
6432 }
6433
6434 return false;
6435}
6436
6437/// Return true when a code sequence can improve throughput. It
6438/// should be called only for instructions in loops.
6439/// \param Pattern - combiner pattern
6441 switch (Pattern) {
6442 default:
6443 break;
6549 return true;
6550 } // end switch (Pattern)
6551 return false;
6552}
6553
6554/// Find other MI combine patterns.
6556 SmallVectorImpl<unsigned> &Patterns) {
6557 // A - (B + C) ==> (A - B) - C or (A - C) - B
6558 unsigned Opc = Root.getOpcode();
6559 MachineBasicBlock &MBB = *Root.getParent();
6560
6561 switch (Opc) {
6562 case AArch64::SUBWrr:
6563 case AArch64::SUBSWrr:
6564 case AArch64::SUBXrr:
6565 case AArch64::SUBSXrr:
6566 // Found candidate root.
6567 break;
6568 default:
6569 return false;
6570 }
6571
6572 if (isCombineInstrSettingFlag(Opc) &&
6573 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
6574 -1)
6575 return false;
6576
6577 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
6578 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
6579 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
6580 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
6583 return true;
6584 }
6585
6586 return false;
6587}
6588
6591 switch (Pattern) {
6595 default:
6597 }
6598}
6599
6600/// Return true when there is potentially a faster code sequence for an
6601/// instruction chain ending in \p Root. All potential patterns are listed in
6602/// the \p Pattern vector. Pattern should be sorted in priority order since the
6603/// pattern evaluator stops checking as soon as it finds a faster sequence.
6604
6606 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
6607 bool DoRegPressureReduce) const {
6608 // Integer patterns
6609 if (getMaddPatterns(Root, Patterns))
6610 return true;
6611 // Floating point patterns
6612 if (getFMULPatterns(Root, Patterns))
6613 return true;
6614 if (getFMAPatterns(Root, Patterns))
6615 return true;
6616 if (getFNEGPatterns(Root, Patterns))
6617 return true;
6618
6619 // Other patterns
6620 if (getMiscPatterns(Root, Patterns))
6621 return true;
6622
6623 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
6624 DoRegPressureReduce);
6625}
6626
6628/// genFusedMultiply - Generate fused multiply instructions.
6629/// This function supports both integer and floating point instructions.
6630/// A typical example:
6631/// F|MUL I=A,B,0
6632/// F|ADD R,I,C
6633/// ==> F|MADD R,A,B,C
6634/// \param MF Containing MachineFunction
6635/// \param MRI Register information
6636/// \param TII Target information
6637/// \param Root is the F|ADD instruction
6638/// \param [out] InsInstrs is a vector of machine instructions and will
6639/// contain the generated madd instruction
6640/// \param IdxMulOpd is index of operand in Root that is the result of
6641/// the F|MUL. In the example above IdxMulOpd is 1.
6642/// \param MaddOpc the opcode fo the f|madd instruction
6643/// \param RC Register class of operands
6644/// \param kind of fma instruction (addressing mode) to be generated
6645/// \param ReplacedAddend is the result register from the instruction
6646/// replacing the non-combined operand, if any.
6647static MachineInstr *
6649 const TargetInstrInfo *TII, MachineInstr &Root,
6650 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
6651 unsigned MaddOpc, const TargetRegisterClass *RC,
6652 FMAInstKind kind = FMAInstKind::Default,
6653 const Register *ReplacedAddend = nullptr) {
6654 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
6655
6656 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
6657 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
6658 Register ResultReg = Root.getOperand(0).getReg();
6659 Register SrcReg0 = MUL->getOperand(1).getReg();
6660 bool Src0IsKill = MUL->getOperand(1).isKill();
6661 Register SrcReg1 = MUL->getOperand(2).getReg();
6662 bool Src1IsKill = MUL->getOperand(2).isKill();
6663
6664 Register SrcReg2;
6665 bool Src2IsKill;
6666 if (ReplacedAddend) {
6667 // If we just generated a new addend, we must be it's only use.
6668 SrcReg2 = *ReplacedAddend;
6669 Src2IsKill = true;
6670 } else {
6671 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
6672 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
6673 }
6674
6675 if (ResultReg.isVirtual())
6676 MRI.constrainRegClass(ResultReg, RC);
6677 if (SrcReg0.isVirtual())
6678 MRI.constrainRegClass(SrcReg0, RC);
6679 if (SrcReg1.isVirtual())
6680 MRI.constrainRegClass(SrcReg1, RC);
6681 if (SrcReg2.isVirtual())
6682 MRI.constrainRegClass(SrcReg2, RC);
6683
6685 if (kind == FMAInstKind::Default)
6686 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6687 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6688 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6689 .addReg(SrcReg2, getKillRegState(Src2IsKill));
6690 else if (kind == FMAInstKind::Indexed)
6691 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6692 .addReg(SrcReg2, getKillRegState(Src2IsKill))
6693 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6694 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6695 .addImm(MUL->getOperand(3).getImm());
6696 else if (kind == FMAInstKind::Accumulator)
6697 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6698 .addReg(SrcReg2, getKillRegState(Src2IsKill))
6699 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6700 .addReg(SrcReg1, getKillRegState(Src1IsKill));
6701 else
6702 assert(false && "Invalid FMA instruction kind \n");
6703 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
6704 InsInstrs.push_back(MIB);
6705 return MUL;
6706}
6707
6708static MachineInstr *
6710 const TargetInstrInfo *TII, MachineInstr &Root,
6712 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
6713
6714 unsigned Opc = 0;
6715 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
6716 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6717 Opc = AArch64::FNMADDSrrr;
6718 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
6719 Opc = AArch64::FNMADDDrrr;
6720 else
6721 return nullptr;
6722
6723 Register ResultReg = Root.getOperand(0).getReg();
6724 Register SrcReg0 = MAD->getOperand(1).getReg();
6725 Register SrcReg1 = MAD->getOperand(2).getReg();
6726 Register SrcReg2 = MAD->getOperand(3).getReg();
6727 bool Src0IsKill = MAD->getOperand(1).isKill();
6728 bool Src1IsKill = MAD->getOperand(2).isKill();
6729 bool Src2IsKill = MAD->getOperand(3).isKill();
6730 if (ResultReg.isVirtual())
6731 MRI.constrainRegClass(ResultReg, RC);
6732 if (SrcReg0.isVirtual())
6733 MRI.constrainRegClass(SrcReg0, RC);
6734 if (SrcReg1.isVirtual())
6735 MRI.constrainRegClass(SrcReg1, RC);
6736 if (SrcReg2.isVirtual())
6737 MRI.constrainRegClass(SrcReg2, RC);
6738
6740 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
6741 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6742 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6743 .addReg(SrcReg2, getKillRegState(Src2IsKill));
6744 InsInstrs.push_back(MIB);
6745
6746 return MAD;
6747}
6748
6749/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
6750static MachineInstr *
6753 unsigned IdxDupOp, unsigned MulOpc,
6755 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
6756 "Invalid index of FMUL operand");
6757
6758 MachineFunction &MF = *Root.getMF();
6760
6761 MachineInstr *Dup =
6762 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
6763
6764 if (Dup->getOpcode() == TargetOpcode::COPY)
6765 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
6766
6767 Register DupSrcReg = Dup->getOperand(1).getReg();
6768 MRI.clearKillFlags(DupSrcReg);
6769 MRI.constrainRegClass(DupSrcReg, RC);
6770
6771 unsigned DupSrcLane = Dup->getOperand(2).getImm();
6772
6773 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
6774 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
6775
6776 Register ResultReg = Root.getOperand(0).getReg();
6777
6779 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
6780 .add(MulOp)
6781 .addReg(DupSrcReg)
6782 .addImm(DupSrcLane);
6783
6784 InsInstrs.push_back(MIB);
6785 return &Root;
6786}
6787
6788/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
6789/// instructions.
6790///
6791/// \see genFusedMultiply
6795 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
6796 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6797 FMAInstKind::Accumulator);
6798}
6799
6800/// genNeg - Helper to generate an intermediate negation of the second operand
6801/// of Root
6803 const TargetInstrInfo *TII, MachineInstr &Root,
6805 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg,
6806 unsigned MnegOpc, const TargetRegisterClass *RC) {
6807 Register NewVR = MRI.createVirtualRegister(RC);
6809 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
6810 .add(Root.getOperand(2));
6811 InsInstrs.push_back(MIB);
6812
6813 assert(InstrIdxForVirtReg.empty());
6814 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6815
6816 return NewVR;
6817}
6818
6819/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
6820/// instructions with an additional negation of the accumulator
6824 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
6825 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
6826 assert(IdxMulOpd == 1);
6827
6828 Register NewVR =
6829 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
6830 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6831 FMAInstKind::Accumulator, &NewVR);
6832}
6833
6834/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
6835/// instructions.
6836///
6837/// \see genFusedMultiply
6841 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
6842 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6843 FMAInstKind::Indexed);
6844}
6845
6846/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
6847/// instructions with an additional negation of the accumulator
6851 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
6852 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
6853 assert(IdxMulOpd == 1);
6854
6855 Register NewVR =
6856 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
6857
6858 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
6859 FMAInstKind::Indexed, &NewVR);
6860}
6861
6862/// genMaddR - Generate madd instruction and combine mul and add using
6863/// an extra virtual register
6864/// Example - an ADD intermediate needs to be stored in a register:
6865/// MUL I=A,B,0
6866/// ADD R,I,Imm
6867/// ==> ORR V, ZR, Imm
6868/// ==> MADD R,A,B,V
6869/// \param MF Containing MachineFunction
6870/// \param MRI Register information
6871/// \param TII Target information
6872/// \param Root is the ADD instruction
6873/// \param [out] InsInstrs is a vector of machine instructions and will
6874/// contain the generated madd instruction
6875/// \param IdxMulOpd is index of operand in Root that is the result of
6876/// the MUL. In the example above IdxMulOpd is 1.
6877/// \param MaddOpc the opcode fo the madd instruction
6878/// \param VR is a virtual register that holds the value of an ADD operand
6879/// (V in the example above).
6880/// \param RC Register class of operands
6882 const TargetInstrInfo *TII, MachineInstr &Root,
6884 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
6885 const TargetRegisterClass *RC) {
6886 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
6887
6888 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
6889 Register ResultReg = Root.getOperand(0).getReg();
6890 Register SrcReg0 = MUL->getOperand(1).getReg();
6891 bool Src0IsKill = MUL->getOperand(1).isKill();
6892 Register SrcReg1 = MUL->getOperand(2).getReg();
6893 bool Src1IsKill = MUL->getOperand(2).isKill();
6894
6895 if (ResultReg.isVirtual())
6896 MRI.constrainRegClass(ResultReg, RC);
6897 if (SrcReg0.isVirtual())
6898 MRI.constrainRegClass(SrcReg0, RC);
6899 if (SrcReg1.isVirtual())
6900 MRI.constrainRegClass(SrcReg1, RC);
6902 MRI.constrainRegClass(VR, RC);
6903
6905 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
6906 .addReg(SrcReg0, getKillRegState(Src0IsKill))
6907 .addReg(SrcReg1, getKillRegState(Src1IsKill))
6908 .addReg(VR);
6909 // Insert the MADD
6910 InsInstrs.push_back(MIB);
6911 return MUL;
6912}
6913
6914/// Do the following transformation
6915/// A - (B + C) ==> (A - B) - C
6916/// A - (B + C) ==> (A - C) - B
6917static void
6919 const TargetInstrInfo *TII, MachineInstr &Root,
6922 unsigned IdxOpd1,
6923 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) {
6924 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
6925 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
6926 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
6927
6928 Register ResultReg = Root.getOperand(0).getReg();
6929 Register RegA = Root.getOperand(1).getReg();
6930 bool RegAIsKill = Root.getOperand(1).isKill();
6931 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
6932 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
6933 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
6934 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
6935 Register NewVR = MRI.createVirtualRegister(MRI.getRegClass(RegA));
6936
6937 unsigned Opcode = Root.getOpcode();
6938 if (Opcode == AArch64::SUBSWrr)
6939 Opcode = AArch64::SUBWrr;
6940 else if (Opcode == AArch64::SUBSXrr)
6941 Opcode = AArch64::SUBXrr;
6942 else
6943 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
6944 "Unexpected instruction opcode.");
6945
6946 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
6947 Flags &= ~MachineInstr::NoSWrap;
6948 Flags &= ~MachineInstr::NoUWrap;
6949
6950 MachineInstrBuilder MIB1 =
6951 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
6952 .addReg(RegA, getKillRegState(RegAIsKill))
6953 .addReg(RegB, getKillRegState(RegBIsKill))
6954 .setMIFlags(Flags);
6955 MachineInstrBuilder MIB2 =
6956 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
6957 .addReg(NewVR, getKillRegState(true))
6958 .addReg(RegC, getKillRegState(RegCIsKill))
6959 .setMIFlags(Flags);
6960
6961 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
6962 InsInstrs.push_back(MIB1);
6963 InsInstrs.push_back(MIB2);
6964 DelInstrs.push_back(AddMI);
6965 DelInstrs.push_back(&Root);
6966}
6967
6968/// When getMachineCombinerPatterns() finds potential patterns,
6969/// this function generates the instructions that could replace the
6970/// original code sequence
6972 MachineInstr &Root, unsigned Pattern,
6975 DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
6976 MachineBasicBlock &MBB = *Root.getParent();
6978 MachineFunction &MF = *MBB.getParent();
6980
6981 MachineInstr *MUL = nullptr;
6982 const TargetRegisterClass *RC;
6983 unsigned Opc;
6984 switch (Pattern) {
6985 default:
6986 // Reassociate instructions.
6988 DelInstrs, InstrIdxForVirtReg);
6989 return;
6991 // A - (B + C)
6992 // ==> (A - B) - C
6993 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
6994 InstrIdxForVirtReg);
6995 return;
6997 // A - (B + C)
6998 // ==> (A - C) - B
6999 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
7000 InstrIdxForVirtReg);
7001 return;
7004 // MUL I=A,B,0
7005 // ADD R,I,C
7006 // ==> MADD R,A,B,C
7007 // --- Create(MADD);
7009 Opc = AArch64::MADDWrrr;
7010 RC = &AArch64::GPR32RegClass;
7011 } else {
7012 Opc = AArch64::MADDXrrr;
7013 RC = &AArch64::GPR64RegClass;
7014 }
7015 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7016 break;
7019 // MUL I=A,B,0
7020 // ADD R,C,I
7021 // ==> MADD R,A,B,C
7022 // --- Create(MADD);
7024 Opc = AArch64::MADDWrrr;
7025 RC = &AArch64::GPR32RegClass;
7026 } else {
7027 Opc = AArch64::MADDXrrr;
7028 RC = &AArch64::GPR64RegClass;
7029 }
7030 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7031 break;
7034 // MUL I=A,B,0
7035 // ADD R,I,Imm
7036 // ==> MOV V, Imm
7037 // ==> MADD R,A,B,V
7038 // --- Create(MADD);
7039 const TargetRegisterClass *OrrRC;
7040 unsigned BitSize, OrrOpc, ZeroReg;
7042 OrrOpc = AArch64::ORRWri;
7043 OrrRC = &AArch64::GPR32spRegClass;
7044 BitSize = 32;
7045 ZeroReg = AArch64::WZR;
7046 Opc = AArch64::MADDWrrr;
7047 RC = &AArch64::GPR32RegClass;
7048 } else {
7049 OrrOpc = AArch64::ORRXri;
7050 OrrRC = &AArch64::GPR64spRegClass;
7051 BitSize = 64;
7052 ZeroReg = AArch64::XZR;
7053 Opc = AArch64::MADDXrrr;
7054 RC = &AArch64::GPR64RegClass;
7055 }
7056 Register NewVR = MRI.createVirtualRegister(OrrRC);
7057 uint64_t Imm = Root.getOperand(2).getImm();
7058
7059 if (Root.getOperand(3).isImm()) {
7060 unsigned Val = Root.getOperand(3).getImm();
7061 Imm = Imm << Val;
7062 }
7063 uint64_t UImm = SignExtend64(Imm, BitSize);
7064 // The immediate can be composed via a single instruction.
7066 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7067 if (Insn.size() != 1)
7068 return;
7069 auto MovI = Insn.begin();
7071 // MOV is an alias for one of three instructions: movz, movn, and orr.
7072 if (MovI->Opcode == OrrOpc)
7073 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7074 .addReg(ZeroReg)
7075 .addImm(MovI->Op2);
7076 else {
7077 if (BitSize == 32)
7078 assert((MovI->Opcode == AArch64::MOVNWi ||
7079 MovI->Opcode == AArch64::MOVZWi) &&
7080 "Expected opcode");
7081 else
7082 assert((MovI->Opcode == AArch64::MOVNXi ||
7083 MovI->Opcode == AArch64::MOVZXi) &&
7084 "Expected opcode");
7085 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7086 .addImm(MovI->Op1)
7087 .addImm(MovI->Op2);
7088 }
7089 InsInstrs.push_back(MIB1);
7090 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7091 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7092 break;
7093 }
7096 // MUL I=A,B,0
7097 // SUB R,I, C
7098 // ==> SUB V, 0, C
7099 // ==> MADD R,A,B,V // = -C + A*B
7100 // --- Create(MADD);
7101 const TargetRegisterClass *SubRC;
7102 unsigned SubOpc, ZeroReg;
7104 SubOpc = AArch64::SUBWrr;
7105 SubRC = &AArch64::GPR32spRegClass;
7106 ZeroReg = AArch64::WZR;
7107 Opc = AArch64::MADDWrrr;
7108 RC = &AArch64::GPR32RegClass;
7109 } else {
7110 SubOpc = AArch64::SUBXrr;
7111 SubRC = &AArch64::GPR64spRegClass;
7112 ZeroReg = AArch64::XZR;
7113 Opc = AArch64::MADDXrrr;
7114 RC = &AArch64::GPR64RegClass;
7115 }
7116 Register NewVR = MRI.createVirtualRegister(SubRC);
7117 // SUB NewVR, 0, C
7118 MachineInstrBuilder MIB1 =
7119 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
7120 .addReg(ZeroReg)
7121 .add(Root.getOperand(2));
7122 InsInstrs.push_back(MIB1);
7123 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7124 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7125 break;
7126 }
7129 // MUL I=A,B,0
7130 // SUB R,C,I
7131 // ==> MSUB R,A,B,C (computes C - A*B)
7132 // --- Create(MSUB);
7134 Opc = AArch64::MSUBWrrr;
7135 RC = &AArch64::GPR32RegClass;
7136 } else {
7137 Opc = AArch64::MSUBXrrr;
7138 RC = &AArch64::GPR64RegClass;
7139 }
7140 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7141 break;
7144 // MUL I=A,B,0
7145 // SUB R,I, Imm
7146 // ==> MOV V, -Imm
7147 // ==> MADD R,A,B,V // = -Imm + A*B
7148 // --- Create(MADD);
7149 const TargetRegisterClass *OrrRC;
7150 unsigned BitSize, OrrOpc, ZeroReg;
7152 OrrOpc = AArch64::ORRWri;
7153 OrrRC = &AArch64::GPR32spRegClass;
7154 BitSize = 32;
7155 ZeroReg = AArch64::WZR;
7156 Opc = AArch64::MADDWrrr;
7157 RC = &AArch64::GPR32RegClass;
7158 } else {
7159 OrrOpc = AArch64::ORRXri;
7160 OrrRC = &AArch64::GPR64spRegClass;
7161 BitSize = 64;
7162 ZeroReg = AArch64::XZR;
7163 Opc = AArch64::MADDXrrr;
7164 RC = &AArch64::GPR64RegClass;
7165 }
7166 Register NewVR = MRI.createVirtualRegister(OrrRC);
7167 uint64_t Imm = Root.getOperand(2).getImm();
7168 if (Root.getOperand(3).isImm()) {
7169 unsigned Val = Root.getOperand(3).getImm();
7170 Imm = Imm << Val;
7171 }
7172 uint64_t UImm = SignExtend64(-Imm, BitSize);
7173 // The immediate can be composed via a single instruction.
7175 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
7176 if (Insn.size() != 1)
7177 return;
7178 auto MovI = Insn.begin();
7180 // MOV is an alias for one of three instructions: movz, movn, and orr.
7181 if (MovI->Opcode == OrrOpc)
7182 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(OrrOpc), NewVR)
7183 .addReg(ZeroReg)
7184 .addImm(MovI->Op2);
7185 else {
7186 if (BitSize == 32)
7187 assert((MovI->Opcode == AArch64::MOVNWi ||
7188 MovI->Opcode == AArch64::MOVZWi) &&
7189 "Expected opcode");
7190 else
7191 assert((MovI->Opcode == AArch64::MOVNXi ||
7192 MovI->Opcode == AArch64::MOVZXi) &&
7193 "Expected opcode");
7194 MIB1 = BuildMI(MF, MIMetadata(Root), TII->get(MovI->Opcode), NewVR)
7195 .addImm(MovI->Op1)
7196 .addImm(MovI->Op2);
7197 }
7198 InsInstrs.push_back(MIB1);
7199 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7200 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
7201 break;
7202 }
7203
7205 Opc = AArch64::MLAv8i8;
7206 RC = &AArch64::FPR64RegClass;
7207 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7208 break;
7210 Opc = AArch64::MLAv8i8;
7211 RC = &AArch64::FPR64RegClass;
7212 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7213 break;
7215 Opc = AArch64::MLAv16i8;
7216 RC = &AArch64::FPR128RegClass;
7217 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7218 break;
7220 Opc = AArch64::MLAv16i8;
7221 RC = &AArch64::FPR128RegClass;
7222 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7223 break;
7225 Opc = AArch64::MLAv4i16;
7226 RC = &AArch64::FPR64RegClass;
7227 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7228 break;
7230 Opc = AArch64::MLAv4i16;
7231 RC = &AArch64::FPR64RegClass;
7232 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7233 break;
7235 Opc = AArch64::MLAv8i16;
7236 RC = &AArch64::FPR128RegClass;
7237 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7238 break;
7240 Opc = AArch64::MLAv8i16;
7241 RC = &AArch64::FPR128RegClass;
7242 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7243 break;
7245 Opc = AArch64::MLAv2i32;
7246 RC = &AArch64::FPR64RegClass;
7247 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7248 break;
7250 Opc = AArch64::MLAv2i32;
7251 RC = &AArch64::FPR64RegClass;
7252 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7253 break;
7255 Opc = AArch64::MLAv4i32;
7256 RC = &AArch64::FPR128RegClass;
7257 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7258 break;
7260 Opc = AArch64::MLAv4i32;
7261 RC = &AArch64::FPR128RegClass;
7262 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7263 break;
7264
7266 Opc = AArch64::MLAv8i8;
7267 RC = &AArch64::FPR64RegClass;
7268 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7269 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
7270 RC);
7271 break;
7273 Opc = AArch64::MLSv8i8;
7274 RC = &AArch64::FPR64RegClass;
7275 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7276 break;
7278 Opc = AArch64::MLAv16i8;
7279 RC = &AArch64::FPR128RegClass;
7280 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7281 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
7282 RC);
7283 break;
7285 Opc = AArch64::MLSv16i8;
7286 RC = &AArch64::FPR128RegClass;
7287 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7288 break;
7290 Opc = AArch64::MLAv4i16;
7291 RC = &AArch64::FPR64RegClass;
7292 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7293 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7294 RC);
7295 break;
7297 Opc = AArch64::MLSv4i16;
7298 RC = &AArch64::FPR64RegClass;
7299 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7300 break;
7302 Opc = AArch64::MLAv8i16;
7303 RC = &AArch64::FPR128RegClass;
7304 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7305 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7306 RC);
7307 break;
7309 Opc = AArch64::MLSv8i16;
7310 RC = &AArch64::FPR128RegClass;
7311 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7312 break;
7314 Opc = AArch64::MLAv2i32;
7315 RC = &AArch64::FPR64RegClass;
7316 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7317 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7318 RC);
7319 break;
7321 Opc = AArch64::MLSv2i32;
7322 RC = &AArch64::FPR64RegClass;
7323 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7324 break;
7326 Opc = AArch64::MLAv4i32;
7327 RC = &AArch64::FPR128RegClass;
7328 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
7329 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7330 RC);
7331 break;
7333 Opc = AArch64::MLSv4i32;
7334 RC = &AArch64::FPR128RegClass;
7335 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7336 break;
7337
7339 Opc = AArch64::MLAv4i16_indexed;
7340 RC = &AArch64::FPR64RegClass;
7341 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7342 break;
7344 Opc = AArch64::MLAv4i16_indexed;
7345 RC = &AArch64::FPR64RegClass;
7346 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7347 break;
7349 Opc = AArch64::MLAv8i16_indexed;
7350 RC = &AArch64::FPR128RegClass;
7351 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7352 break;
7354 Opc = AArch64::MLAv8i16_indexed;
7355 RC = &AArch64::FPR128RegClass;
7356 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7357 break;
7359 Opc = AArch64::MLAv2i32_indexed;
7360 RC = &AArch64::FPR64RegClass;
7361 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7362 break;
7364 Opc = AArch64::MLAv2i32_indexed;
7365 RC = &AArch64::FPR64RegClass;
7366 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7367 break;
7369 Opc = AArch64::MLAv4i32_indexed;
7370 RC = &AArch64::FPR128RegClass;
7371 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7372 break;
7374 Opc = AArch64::MLAv4i32_indexed;
7375 RC = &AArch64::FPR128RegClass;
7376 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7377 break;
7378
7380 Opc = AArch64::MLAv4i16_indexed;
7381 RC = &AArch64::FPR64RegClass;
7382 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7383 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
7384 RC);
7385 break;
7387 Opc = AArch64::MLSv4i16_indexed;
7388 RC = &AArch64::FPR64RegClass;
7389 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7390 break;
7392 Opc = AArch64::MLAv8i16_indexed;
7393 RC = &AArch64::FPR128RegClass;
7394 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7395 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
7396 RC);
7397 break;
7399 Opc = AArch64::MLSv8i16_indexed;
7400 RC = &AArch64::FPR128RegClass;
7401 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7402 break;
7404 Opc = AArch64::MLAv2i32_indexed;
7405 RC = &AArch64::FPR64RegClass;
7406 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7407 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
7408 RC);
7409 break;
7411 Opc = AArch64::MLSv2i32_indexed;
7412 RC = &AArch64::FPR64RegClass;
7413 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7414 break;
7416 Opc = AArch64::MLAv4i32_indexed;
7417 RC = &AArch64::FPR128RegClass;
7418 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
7419 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
7420 RC);
7421 break;
7423 Opc = AArch64::MLSv4i32_indexed;
7424 RC = &AArch64::FPR128RegClass;
7425 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7426 break;
7427
7428 // Floating Point Support
7430 Opc = AArch64::FMADDHrrr;
7431 RC = &AArch64::FPR16RegClass;
7432 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7433 break;
7435 Opc = AArch64::FMADDSrrr;
7436 RC = &AArch64::FPR32RegClass;
7437 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7438 break;
7440 Opc = AArch64::FMADDDrrr;
7441 RC = &AArch64::FPR64RegClass;
7442 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7443 break;
7444
7446 Opc = AArch64::FMADDHrrr;
7447 RC = &AArch64::FPR16RegClass;
7448 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7449 break;
7451 Opc = AArch64::FMADDSrrr;
7452 RC = &AArch64::FPR32RegClass;
7453 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7454 break;
7456 Opc = AArch64::FMADDDrrr;
7457 RC = &AArch64::FPR64RegClass;
7458 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7459 break;
7460
7462 Opc = AArch64::FMLAv1i32_indexed;
7463 RC = &AArch64::FPR32RegClass;
7464 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7465 FMAInstKind::Indexed);
7466 break;
7468 Opc = AArch64::FMLAv1i32_indexed;
7469 RC = &AArch64::FPR32RegClass;
7470 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7471 FMAInstKind::Indexed);
7472 break;
7473
7475 Opc = AArch64::FMLAv1i64_indexed;
7476 RC = &AArch64::FPR64RegClass;
7477 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7478 FMAInstKind::Indexed);
7479 break;
7481 Opc = AArch64::FMLAv1i64_indexed;
7482 RC = &AArch64::FPR64RegClass;
7483 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7484 FMAInstKind::Indexed);
7485 break;
7486
7488 RC = &AArch64::FPR64RegClass;
7489 Opc = AArch64::FMLAv4i16_indexed;
7490 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7491 FMAInstKind::Indexed);
7492 break;
7494 RC = &AArch64::FPR64RegClass;
7495 Opc = AArch64::FMLAv4f16;
7496 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7497 FMAInstKind::Accumulator);
7498 break;
7500 RC = &AArch64::FPR64RegClass;
7501 Opc = AArch64::FMLAv4i16_indexed;
7502 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7503 FMAInstKind::Indexed);
7504 break;
7506 RC = &AArch64::FPR64RegClass;
7507 Opc = AArch64::FMLAv4f16;
7508 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7509 FMAInstKind::Accumulator);
7510 break;
7511
7514 RC = &AArch64::FPR64RegClass;
7516 Opc = AArch64::FMLAv2i32_indexed;
7517 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7518 FMAInstKind::Indexed);
7519 } else {
7520 Opc = AArch64::FMLAv2f32;
7521 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7522 FMAInstKind::Accumulator);
7523 }
7524 break;
7527 RC = &AArch64::FPR64RegClass;
7529 Opc = AArch64::FMLAv2i32_indexed;
7530 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7531 FMAInstKind::Indexed);
7532 } else {
7533 Opc = AArch64::FMLAv2f32;
7534 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7535 FMAInstKind::Accumulator);
7536 }
7537 break;
7538
7540 RC = &AArch64::FPR128RegClass;
7541 Opc = AArch64::FMLAv8i16_indexed;
7542 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7543 FMAInstKind::Indexed);
7544 break;
7546 RC = &AArch64::FPR128RegClass;
7547 Opc = AArch64::FMLAv8f16;
7548 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7549 FMAInstKind::Accumulator);
7550 break;
7552 RC = &AArch64::FPR128RegClass;
7553 Opc = AArch64::FMLAv8i16_indexed;
7554 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7555 FMAInstKind::Indexed);
7556 break;
7558 RC = &AArch64::FPR128RegClass;
7559 Opc = AArch64::FMLAv8f16;
7560 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7561 FMAInstKind::Accumulator);
7562 break;
7563
7566 RC = &AArch64::FPR128RegClass;
7568 Opc = AArch64::FMLAv2i64_indexed;
7569 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7570 FMAInstKind::Indexed);
7571 } else {
7572 Opc = AArch64::FMLAv2f64;
7573 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7574 FMAInstKind::Accumulator);
7575 }
7576 break;
7579 RC = &AArch64::FPR128RegClass;
7581 Opc = AArch64::FMLAv2i64_indexed;
7582 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7583 FMAInstKind::Indexed);
7584 } else {
7585 Opc = AArch64::FMLAv2f64;
7586 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7587 FMAInstKind::Accumulator);
7588 }
7589 break;
7590
7593 RC = &AArch64::FPR128RegClass;
7595 Opc = AArch64::FMLAv4i32_indexed;
7596 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7597 FMAInstKind::Indexed);
7598 } else {
7599 Opc = AArch64::FMLAv4f32;
7600 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7601 FMAInstKind::Accumulator);
7602 }
7603 break;
7604
7607 RC = &AArch64::FPR128RegClass;
7609 Opc = AArch64::FMLAv4i32_indexed;
7610 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7611 FMAInstKind::Indexed);
7612 } else {
7613 Opc = AArch64::FMLAv4f32;
7614 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7615 FMAInstKind::Accumulator);
7616 }
7617 break;
7618
7620 Opc = AArch64::FNMSUBHrrr;
7621 RC = &AArch64::FPR16RegClass;
7622 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7623 break;
7625 Opc = AArch64::FNMSUBSrrr;
7626 RC = &AArch64::FPR32RegClass;
7627 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7628 break;
7630 Opc = AArch64::FNMSUBDrrr;
7631 RC = &AArch64::FPR64RegClass;
7632 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7633 break;
7634
7636 Opc = AArch64::FNMADDHrrr;
7637 RC = &AArch64::FPR16RegClass;
7638 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7639 break;
7641 Opc = AArch64::FNMADDSrrr;
7642 RC = &AArch64::FPR32RegClass;
7643 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7644 break;
7646 Opc = AArch64::FNMADDDrrr;
7647 RC = &AArch64::FPR64RegClass;
7648 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
7649 break;
7650
7652 Opc = AArch64::FMSUBHrrr;
7653 RC = &AArch64::FPR16RegClass;
7654 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7655 break;
7657 Opc = AArch64::FMSUBSrrr;
7658 RC = &AArch64::FPR32RegClass;
7659 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7660 break;
7662 Opc = AArch64::FMSUBDrrr;
7663 RC = &AArch64::FPR64RegClass;
7664 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
7665 break;
7666
7668 Opc = AArch64::FMLSv1i32_indexed;
7669 RC = &AArch64::FPR32RegClass;
7670 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7671 FMAInstKind::Indexed);
7672 break;
7673
7675 Opc = AArch64::FMLSv1i64_indexed;
7676 RC = &AArch64::FPR64RegClass;
7677 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7678 FMAInstKind::Indexed);
7679 break;
7680
7683 RC = &AArch64::FPR64RegClass;
7684 Register NewVR = MRI.createVirtualRegister(RC);
7685 MachineInstrBuilder MIB1 =
7686 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
7687 .add(Root.getOperand(2));
7688 InsInstrs.push_back(MIB1);
7689 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7691 Opc = AArch64::FMLAv4f16;
7692 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7693 FMAInstKind::Accumulator, &NewVR);
7694 } else {
7695 Opc = AArch64::FMLAv4i16_indexed;
7696 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7697 FMAInstKind::Indexed, &NewVR);
7698 }
7699 break;
7700 }
7702 RC = &AArch64::FPR64RegClass;
7703 Opc = AArch64::FMLSv4f16;
7704 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7705 FMAInstKind::Accumulator);
7706 break;
7708 RC = &AArch64::FPR64RegClass;
7709 Opc = AArch64::FMLSv4i16_indexed;
7710 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7711 FMAInstKind::Indexed);
7712 break;
7713
7716 RC = &AArch64::FPR64RegClass;
7718 Opc = AArch64::FMLSv2i32_indexed;
7719 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7720 FMAInstKind::Indexed);
7721 } else {
7722 Opc = AArch64::FMLSv2f32;
7723 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7724 FMAInstKind::Accumulator);
7725 }
7726 break;
7727
7730 RC = &AArch64::FPR128RegClass;
7731 Register NewVR = MRI.createVirtualRegister(RC);
7732 MachineInstrBuilder MIB1 =
7733 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
7734 .add(Root.getOperand(2));
7735 InsInstrs.push_back(MIB1);
7736 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7738 Opc = AArch64::FMLAv8f16;
7739 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7740 FMAInstKind::Accumulator, &NewVR);
7741 } else {
7742 Opc = AArch64::FMLAv8i16_indexed;
7743 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7744 FMAInstKind::Indexed, &NewVR);
7745 }
7746 break;
7747 }
7749 RC = &AArch64::FPR128RegClass;
7750 Opc = AArch64::FMLSv8f16;
7751 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7752 FMAInstKind::Accumulator);
7753 break;
7755 RC = &AArch64::FPR128RegClass;
7756 Opc = AArch64::FMLSv8i16_indexed;
7757 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7758 FMAInstKind::Indexed);
7759 break;
7760
7763 RC = &AArch64::FPR128RegClass;
7765 Opc = AArch64::FMLSv2i64_indexed;
7766 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7767 FMAInstKind::Indexed);
7768 } else {
7769 Opc = AArch64::FMLSv2f64;
7770 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7771 FMAInstKind::Accumulator);
7772 }
7773 break;
7774
7777 RC = &AArch64::FPR128RegClass;
7779 Opc = AArch64::FMLSv4i32_indexed;
7780 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7781 FMAInstKind::Indexed);
7782 } else {
7783 Opc = AArch64::FMLSv4f32;
7784 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
7785 FMAInstKind::Accumulator);
7786 }
7787 break;
7790 RC = &AArch64::FPR64RegClass;
7791 Register NewVR = MRI.createVirtualRegister(RC);
7792 MachineInstrBuilder MIB1 =
7793 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
7794 .add(Root.getOperand(2));
7795 InsInstrs.push_back(MIB1);
7796 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7798 Opc = AArch64::FMLAv2i32_indexed;
7799 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7800 FMAInstKind::Indexed, &NewVR);
7801 } else {
7802 Opc = AArch64::FMLAv2f32;
7803 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7804 FMAInstKind::Accumulator, &NewVR);
7805 }
7806 break;
7807 }
7810 RC = &AArch64::FPR128RegClass;
7811 Register NewVR = MRI.createVirtualRegister(RC);
7812 MachineInstrBuilder MIB1 =
7813 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
7814 .add(Root.getOperand(2));
7815 InsInstrs.push_back(MIB1);
7816 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7818 Opc = AArch64::FMLAv4i32_indexed;
7819 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7820 FMAInstKind::Indexed, &NewVR);
7821 } else {
7822 Opc = AArch64::FMLAv4f32;
7823 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7824 FMAInstKind::Accumulator, &NewVR);
7825 }
7826 break;
7827 }
7830 RC = &AArch64::FPR128RegClass;
7831 Register NewVR = MRI.createVirtualRegister(RC);
7832 MachineInstrBuilder MIB1 =
7833 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
7834 .add(Root.getOperand(2));
7835 InsInstrs.push_back(MIB1);
7836 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
7838 Opc = AArch64::FMLAv2i64_indexed;
7839 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7840 FMAInstKind::Indexed, &NewVR);
7841 } else {
7842 Opc = AArch64::FMLAv2f64;
7843 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
7844 FMAInstKind::Accumulator, &NewVR);
7845 }
7846 break;
7847 }
7850 unsigned IdxDupOp =
7852 : 2;
7853 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
7854 &AArch64::FPR128RegClass, MRI);
7855 break;
7856 }
7859 unsigned IdxDupOp =
7861 : 2;
7862 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
7863 &AArch64::FPR128RegClass, MRI);
7864 break;
7865 }
7868 unsigned IdxDupOp =
7870 : 2;
7871 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
7872 &AArch64::FPR128_loRegClass, MRI);
7873 break;
7874 }
7877 unsigned IdxDupOp =
7879 : 2;
7880 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
7881 &AArch64::FPR128RegClass, MRI);
7882 break;
7883 }
7886 unsigned IdxDupOp =
7888 : 2;
7889 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
7890 &AArch64::FPR128_loRegClass, MRI);
7891 break;
7892 }
7894 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
7895 break;
7896 }
7897
7898 } // end switch (Pattern)
7899 // Record MUL and ADD/SUB for deletion
7900 if (MUL)
7901 DelInstrs.push_back(MUL);
7902 DelInstrs.push_back(&Root);
7903
7904 // Set the flags on the inserted instructions to be the merged flags of the
7905 // instructions that we have combined.
7906 uint32_t Flags = Root.getFlags();
7907 if (MUL)
7908 Flags = Root.mergeFlagsWith(*MUL);
7909 for (auto *MI : InsInstrs)
7910 MI->setFlags(Flags);
7911}
7912
7913/// Replace csincr-branch sequence by simple conditional branch
7914///
7915/// Examples:
7916/// 1. \code
7917/// csinc w9, wzr, wzr, <condition code>
7918/// tbnz w9, #0, 0x44
7919/// \endcode
7920/// to
7921/// \code
7922/// b.<inverted condition code>
7923/// \endcode
7924///
7925/// 2. \code
7926/// csinc w9, wzr, wzr, <condition code>
7927/// tbz w9, #0, 0x44
7928/// \endcode
7929/// to
7930/// \code
7931/// b.<condition code>
7932/// \endcode
7933///
7934/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
7935/// compare's constant operand is power of 2.
7936///
7937/// Examples:
7938/// \code
7939/// and w8, w8, #0x400
7940/// cbnz w8, L1
7941/// \endcode
7942/// to
7943/// \code
7944/// tbnz w8, #10, L1
7945/// \endcode
7946///
7947/// \param MI Conditional Branch
7948/// \return True when the simple conditional branch is generated
7949///
7951 bool IsNegativeBranch = false;
7952 bool IsTestAndBranch = false;
7953 unsigned TargetBBInMI = 0;
7954 switch (MI.getOpcode()) {
7955 default:
7956 llvm_unreachable("Unknown branch instruction?");
7957 case AArch64::Bcc:
7958 return false;
7959 case AArch64::CBZW:
7960 case AArch64::CBZX:
7961 TargetBBInMI = 1;
7962 break;
7963 case AArch64::CBNZW:
7964 case AArch64::CBNZX:
7965 TargetBBInMI = 1;
7966 IsNegativeBranch = true;
7967 break;
7968 case AArch64::TBZW:
7969 case AArch64::TBZX:
7970 TargetBBInMI = 2;
7971 IsTestAndBranch = true;
7972 break;
7973 case AArch64::TBNZW:
7974 case AArch64::TBNZX:
7975 TargetBBInMI = 2;
7976 IsNegativeBranch = true;
7977 IsTestAndBranch = true;
7978 break;
7979 }
7980 // So we increment a zero register and test for bits other
7981 // than bit 0? Conservatively bail out in case the verifier
7982 // missed this case.
7983 if (IsTestAndBranch && MI.getOperand(1).getImm())
7984 return false;
7985
7986 // Find Definition.
7987 assert(MI.getParent() && "Incomplete machine instruciton\n");
7988 MachineBasicBlock *MBB = MI.getParent();
7989 MachineFunction *MF = MBB->getParent();
7991 Register VReg = MI.getOperand(0).getReg();
7992 if (!VReg.isVirtual())
7993 return false;
7994
7995 MachineInstr *DefMI = MRI->getVRegDef(VReg);
7996
7997 // Look through COPY instructions to find definition.
7998 while (DefMI->isCopy()) {
7999 Register CopyVReg = DefMI->getOperand(1).getReg();
8000 if (!MRI->hasOneNonDBGUse(CopyVReg))
8001 return false;
8002 if (!MRI->hasOneDef(CopyVReg))
8003 return false;
8004 DefMI = MRI->getVRegDef(CopyVReg);
8005 }
8006
8007 switch (DefMI->getOpcode()) {
8008 default:
8009 return false;
8010 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
8011 case AArch64::ANDWri:
8012 case AArch64::ANDXri: {
8013 if (IsTestAndBranch)
8014 return false;
8015 if (DefMI->getParent() != MBB)
8016 return false;
8017 if (!MRI->hasOneNonDBGUse(VReg))
8018 return false;
8019
8020 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
8022 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
8023 if (!isPowerOf2_64(Mask))
8024 return false;
8025
8027 Register NewReg = MO.getReg();
8028 if (!NewReg.isVirtual())
8029 return false;
8030
8031 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
8032
8033 MachineBasicBlock &RefToMBB = *MBB;
8034 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
8035 DebugLoc DL = MI.getDebugLoc();
8036 unsigned Imm = Log2_64(Mask);
8037 unsigned Opc = (Imm < 32)
8038 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
8039 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
8040 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
8041 .addReg(NewReg)
8042 .addImm(Imm)
8043 .addMBB(TBB);
8044 // Register lives on to the CBZ now.
8045 MO.setIsKill(false);
8046
8047 // For immediate smaller than 32, we need to use the 32-bit
8048 // variant (W) in all cases. Indeed the 64-bit variant does not
8049 // allow to encode them.
8050 // Therefore, if the input register is 64-bit, we need to take the
8051 // 32-bit sub-part.
8052 if (!Is32Bit && Imm < 32)
8053 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
8054 MI.eraseFromParent();
8055 return true;
8056 }
8057 // Look for CSINC
8058 case AArch64::CSINCWr:
8059 case AArch64::CSINCXr: {
8060 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
8061 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
8062 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
8063 DefMI->getOperand(2).getReg() == AArch64::XZR))
8064 return false;
8065
8066 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
8067 true) != -1)
8068 return false;
8069
8071 // Convert only when the condition code is not modified between
8072 // the CSINC and the branch. The CC may be used by other
8073 // instructions in between.
8075 return false;
8076 MachineBasicBlock &RefToMBB = *MBB;
8077 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
8078 DebugLoc DL = MI.getDebugLoc();
8079 if (IsNegativeBranch)
8081 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
8082 MI.eraseFromParent();
8083 return true;
8084 }
8085 }
8086}
8087
8088std::pair<unsigned, unsigned>
8090 const unsigned Mask = AArch64II::MO_FRAGMENT;
8091 return std::make_pair(TF & Mask, TF & ~Mask);
8092}
8093
8096 using namespace AArch64II;
8097
8098 static const std::pair<unsigned, const char *> TargetFlags[] = {
8099 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
8100 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
8101 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
8102 {MO_HI12, "aarch64-hi12"}};
8103 return ArrayRef(TargetFlags);
8104}
8105
8108 using namespace AArch64II;
8109
8110 static const std::pair<unsigned, const char *> TargetFlags[] = {
8111 {MO_COFFSTUB, "aarch64-coffstub"},
8112 {MO_GOT, "aarch64-got"},
8113 {MO_NC, "aarch64-nc"},
8114 {MO_S, "aarch64-s"},
8115 {MO_TLS, "aarch64-tls"},
8116 {MO_DLLIMPORT, "aarch64-dllimport"},
8117 {MO_PREL, "aarch64-prel"},
8118 {MO_TAGGED, "aarch64-tagged"},
8119 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
8120 };
8121 return ArrayRef(TargetFlags);
8122}
8123
8126 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
8127 {{MOSuppressPair, "aarch64-suppress-pair"},
8128 {MOStridedAccess, "aarch64-strided-access"}};
8129 return ArrayRef(TargetFlags);
8130}
8131
8132/// Constants defining how certain sequences should be outlined.
8133/// This encompasses how an outlined function should be called, and what kind of
8134/// frame should be emitted for that outlined function.
8135///
8136/// \p MachineOutlinerDefault implies that the function should be called with
8137/// a save and restore of LR to the stack.
8138///
8139/// That is,
8140///
8141/// I1 Save LR OUTLINED_FUNCTION:
8142/// I2 --> BL OUTLINED_FUNCTION I1
8143/// I3 Restore LR I2
8144/// I3
8145/// RET
8146///
8147/// * Call construction overhead: 3 (save + BL + restore)
8148/// * Frame construction overhead: 1 (ret)
8149/// * Requires stack fixups? Yes
8150///
8151/// \p MachineOutlinerTailCall implies that the function is being created from
8152/// a sequence of instructions ending in a return.
8153///
8154/// That is,
8155///
8156/// I1 OUTLINED_FUNCTION:
8157/// I2 --> B OUTLINED_FUNCTION I1
8158/// RET I2
8159/// RET
8160///
8161/// * Call construction overhead: 1 (B)
8162/// * Frame construction overhead: 0 (Return included in sequence)
8163/// * Requires stack fixups? No
8164///
8165/// \p MachineOutlinerNoLRSave implies that the function should be called using
8166/// a BL instruction, but doesn't require LR to be saved and restored. This
8167/// happens when LR is known to be dead.
8168///
8169/// That is,
8170///
8171/// I1 OUTLINED_FUNCTION:
8172/// I2 --> BL OUTLINED_FUNCTION I1
8173/// I3 I2
8174/// I3
8175/// RET
8176///
8177/// * Call construction overhead: 1 (BL)
8178/// * Frame construction overhead: 1 (RET)
8179/// * Requires stack fixups? No
8180///
8181/// \p MachineOutlinerThunk implies that the function is being created from
8182/// a sequence of instructions ending in a call. The outlined function is
8183/// called with a BL instruction, and the outlined function tail-calls the
8184/// original call destination.
8185///
8186/// That is,
8187///
8188/// I1 OUTLINED_FUNCTION:
8189/// I2 --> BL OUTLINED_FUNCTION I1
8190/// BL f I2
8191/// B f
8192/// * Call construction overhead: 1 (BL)
8193/// * Frame construction overhead: 0
8194/// * Requires stack fixups? No
8195///
8196/// \p MachineOutlinerRegSave implies that the function should be called with a
8197/// save and restore of LR to an available register. This allows us to avoid
8198/// stack fixups. Note that this outlining variant is compatible with the
8199/// NoLRSave case.
8200///
8201/// That is,
8202///
8203/// I1 Save LR OUTLINED_FUNCTION:
8204/// I2 --> BL OUTLINED_FUNCTION I1
8205/// I3 Restore LR I2
8206/// I3
8207/// RET
8208///
8209/// * Call construction overhead: 3 (save + BL + restore)
8210/// * Frame construction overhead: 1 (ret)
8211/// * Requires stack fixups? No
8213 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
8214 MachineOutlinerTailCall, /// Only emit a branch.
8215 MachineOutlinerNoLRSave, /// Emit a call and return.
8216 MachineOutlinerThunk, /// Emit a call and tail-call.
8217 MachineOutlinerRegSave /// Same as default, but save to a register.
8219
8223 UnsafeRegsDead = 0x8
8225
8227AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
8228 MachineFunction *MF = C.getMF();
8230 const AArch64RegisterInfo *ARI =
8231 static_cast<const AArch64RegisterInfo *>(&TRI);
8232 // Check if there is an available register across the sequence that we can
8233 // use.
8234 for (unsigned Reg : AArch64::GPR64RegClass) {
8235 if (!ARI->isReservedReg(*MF, Reg) &&
8236 Reg != AArch64::LR && // LR is not reserved, but don't use it.
8237 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
8238 Reg != AArch64::X17 && // Ditto for X17.
8239 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
8240 C.isAvailableInsideSeq(Reg, TRI))
8241 return Reg;
8242 }
8243 return Register();
8244}
8245
8246static bool
8248 const outliner::Candidate &b) {
8249 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8250 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8251
8252 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
8253 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
8254}
8255
8256static bool
8258 const outliner::Candidate &b) {
8259 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
8260 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
8261
8262 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
8263}
8264
8266 const outliner::Candidate &b) {
8267 const AArch64Subtarget &SubtargetA =
8269 const AArch64Subtarget &SubtargetB =
8270 b.getMF()->getSubtarget<AArch64Subtarget>();
8271 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
8272}
8273
8274std::optional<outliner::OutlinedFunction>
8276 std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
8277 unsigned SequenceSize = 0;
8278 for (auto &MI : RepeatedSequenceLocs[0])
8279 SequenceSize += getInstSizeInBytes(MI);
8280
8281 unsigned NumBytesToCreateFrame = 0;
8282
8283 // We only allow outlining for functions having exactly matching return
8284 // address signing attributes, i.e., all share the same value for the
8285 // attribute "sign-return-address" and all share the same type of key they
8286 // are signed with.
8287 // Additionally we require all functions to simultaniously either support
8288 // v8.3a features or not. Otherwise an outlined function could get signed
8289 // using dedicated v8.3 instructions and a call from a function that doesn't
8290 // support v8.3 instructions would therefore be invalid.
8291 if (std::adjacent_find(
8292 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
8293 [](const outliner::Candidate &a, const outliner::Candidate &b) {
8294 // Return true if a and b are non-equal w.r.t. return address
8295 // signing or support of v8.3a features
8296 if (outliningCandidatesSigningScopeConsensus(a, b) &&
8297 outliningCandidatesSigningKeyConsensus(a, b) &&
8298 outliningCandidatesV8_3OpsConsensus(a, b)) {
8299 return false;
8300 }
8301 return true;
8302 }) != RepeatedSequenceLocs.end()) {
8303 return std::nullopt;
8304 }
8305
8306 // Since at this point all candidates agree on their return address signing
8307 // picking just one is fine. If the candidate functions potentially sign their
8308 // return addresses, the outlined function should do the same. Note that in
8309 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
8310 // not certainly true that the outlined function will have to sign its return
8311 // address but this decision is made later, when the decision to outline
8312 // has already been made.
8313 // The same holds for the number of additional instructions we need: On
8314 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
8315 // necessary. However, at this point we don't know if the outlined function
8316 // will have a RET instruction so we assume the worst.
8317 const TargetRegisterInfo &TRI = getRegisterInfo();
8318 // Performing a tail call may require extra checks when PAuth is enabled.
8319 // If PAuth is disabled, set it to zero for uniformity.
8320 unsigned NumBytesToCheckLRInTCEpilogue = 0;
8321 if (RepeatedSequenceLocs[0]
8322 .getMF()
8323 ->getInfo<AArch64FunctionInfo>()
8324 ->shouldSignReturnAddress(true)) {
8325 // One PAC and one AUT instructions
8326 NumBytesToCreateFrame += 8;
8327
8328 // PAuth is enabled - set extra tail call cost, if any.
8329 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod();
8330 NumBytesToCheckLRInTCEpilogue =
8332 // Checking the authenticated LR value may significantly impact
8333 // SequenceSize, so account for it for more precise results.
8334 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
8335 SequenceSize += NumBytesToCheckLRInTCEpilogue;
8336
8337 // We have to check if sp modifying instructions would get outlined.
8338 // If so we only allow outlining if sp is unchanged overall, so matching
8339 // sub and add instructions are okay to outline, all other sp modifications
8340 // are not
8341 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
8342 int SPValue = 0;
8343 for (auto &MI : C) {
8344 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
8345 switch (MI.getOpcode()) {
8346 case AArch64::ADDXri:
8347 case AArch64::ADDWri:
8348 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8349 assert(MI.getOperand(2).isImm() &&
8350 "Expected operand to be immediate");
8351 assert(MI.getOperand(1).isReg() &&
8352 "Expected operand to be a register");
8353 // Check if the add just increments sp. If so, we search for
8354 // matching sub instructions that decrement sp. If not, the
8355 // modification is illegal
8356 if (MI.getOperand(1).getReg() == AArch64::SP)
8357 SPValue += MI.getOperand(2).getImm();
8358 else
8359 return true;
8360 break;
8361 case AArch64::SUBXri:
8362 case AArch64::SUBWri:
8363 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
8364 assert(MI.getOperand(2).isImm() &&
8365 "Expected operand to be immediate");
8366 assert(MI.getOperand(1).isReg() &&
8367 "Expected operand to be a register");
8368 // Check if the sub just decrements sp. If so, we search for
8369 // matching add instructions that increment sp. If not, the
8370 // modification is illegal
8371 if (MI.getOperand(1).getReg() == AArch64::SP)
8372 SPValue -= MI.getOperand(2).getImm();
8373 else
8374 return true;
8375 break;
8376 default:
8377 return true;
8378 }
8379 }
8380 }
8381 if (SPValue)
8382 return true;
8383 return false;
8384 };
8385 // Remove candidates with illegal stack modifying instructions
8386 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
8387
8388 // If the sequence doesn't have enough candidates left, then we're done.
8389 if (RepeatedSequenceLocs.size() < 2)
8390 return std::nullopt;
8391 }
8392
8393 // Properties about candidate MBBs that hold for all of them.
8394 unsigned FlagsSetInAll = 0xF;
8395
8396 // Compute liveness information for each candidate, and set FlagsSetInAll.
8397 for (outliner::Candidate &C : RepeatedSequenceLocs)
8398 FlagsSetInAll &= C.Flags;
8399
8400 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
8401
8402 // Helper lambda which sets call information for every candidate.
8403 auto SetCandidateCallInfo =
8404 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
8405 for (outliner::Candidate &C : RepeatedSequenceLocs)
8406 C.setCallInfo(CallID, NumBytesForCall);
8407 };
8408
8409 unsigned FrameID = MachineOutlinerDefault;
8410 NumBytesToCreateFrame += 4;
8411
8412 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
8413 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
8414 });
8415
8416 // We check to see if CFI Instructions are present, and if they are
8417 // we find the number of CFI Instructions in the candidates.
8418 unsigned CFICount = 0;
8419 for (auto &I : RepeatedSequenceLocs[0]) {
8420 if (I.isCFIInstruction())
8421 CFICount++;
8422 }
8423
8424 // We compare the number of found CFI Instructions to the number of CFI
8425 // instructions in the parent function for each candidate. We must check this
8426 // since if we outline one of the CFI instructions in a function, we have to
8427 // outline them all for correctness. If we do not, the address offsets will be
8428 // incorrect between the two sections of the program.
8429 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8430 std::vector<MCCFIInstruction> CFIInstructions =
8431 C.getMF()->getFrameInstructions();
8432
8433 if (CFICount > 0 && CFICount != CFIInstructions.size())
8434 return std::nullopt;
8435 }
8436
8437 // Returns true if an instructions is safe to fix up, false otherwise.
8438 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
8439 if (MI.isCall())
8440 return true;
8441
8442 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
8443 !MI.readsRegister(AArch64::SP, &TRI))
8444 return true;
8445
8446 // Any modification of SP will break our code to save/restore LR.
8447 // FIXME: We could handle some instructions which add a constant
8448 // offset to SP, with a bit more work.
8449 if (MI.modifiesRegister(AArch64::SP, &TRI))
8450 return false;
8451
8452 // At this point, we have a stack instruction that we might need to
8453 // fix up. We'll handle it if it's a load or store.
8454 if (MI.mayLoadOrStore()) {
8455 const MachineOperand *Base; // Filled with the base operand of MI.
8456 int64_t Offset; // Filled with the offset of MI.
8457 bool OffsetIsScalable;
8458
8459 // Does it allow us to offset the base operand and is the base the
8460 // register SP?
8461 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
8462 !Base->isReg() || Base->getReg() != AArch64::SP)
8463 return false;
8464
8465 // Fixe-up code below assumes bytes.
8466 if (OffsetIsScalable)
8467 return false;
8468
8469 // Find the minimum/maximum offset for this instruction and check
8470 // if fixing it up would be in range.
8471 int64_t MinOffset,
8472 MaxOffset; // Unscaled offsets for the instruction.
8473 // The scale to multiply the offsets by.
8474 TypeSize Scale(0U, false), DummyWidth(0U, false);
8475 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
8476
8477 Offset += 16; // Update the offset to what it would be if we outlined.
8478 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
8479 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
8480 return false;
8481
8482 // It's in range, so we can outline it.
8483 return true;
8484 }
8485
8486 // FIXME: Add handling for instructions like "add x0, sp, #8".
8487
8488 // We can't fix it up, so don't outline it.
8489 return false;
8490 };
8491
8492 // True if it's possible to fix up each stack instruction in this sequence.
8493 // Important for frames/call variants that modify the stack.
8494 bool AllStackInstrsSafe =
8495 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
8496
8497 // If the last instruction in any candidate is a terminator, then we should
8498 // tail call all of the candidates.
8499 if (RepeatedSequenceLocs[0].back().isTerminator()) {
8500 FrameID = MachineOutlinerTailCall;
8501 NumBytesToCreateFrame = 0;
8502 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
8503 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
8504 }
8505
8506 else if (LastInstrOpcode == AArch64::BL ||
8507 ((LastInstrOpcode == AArch64::BLR ||
8508 LastInstrOpcode == AArch64::BLRNoIP) &&
8509 !HasBTI)) {
8510 // FIXME: Do we need to check if the code after this uses the value of LR?
8511 FrameID = MachineOutlinerThunk;
8512 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
8513 SetCandidateCallInfo(MachineOutlinerThunk, 4);
8514 }
8515
8516 else {
8517 // We need to decide how to emit calls + frames. We can always emit the same
8518 // frame if we don't need to save to the stack. If we have to save to the
8519 // stack, then we need a different frame.
8520 unsigned NumBytesNoStackCalls = 0;
8521 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
8522
8523 // Check if we have to save LR.
8524 for (outliner::Candidate &C : RepeatedSequenceLocs) {
8525 bool LRAvailable =
8526 (C.Flags & MachineOutlinerMBBFlags::LRUnavailableSomewhere)
8527 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
8528 : true;
8529 // If we have a noreturn caller, then we're going to be conservative and
8530 // say that we have to save LR. If we don't have a ret at the end of the
8531 // block, then we can't reason about liveness accurately.
8532 //
8533 // FIXME: We can probably do better than always disabling this in
8534 // noreturn functions by fixing up the liveness info.
8535 bool IsNoReturn =
8536 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
8537
8538 // Is LR available? If so, we don't need a save.
8539 if (LRAvailable && !IsNoReturn) {
8540 NumBytesNoStackCalls += 4;
8541 C.setCallInfo(MachineOutlinerNoLRSave, 4);
8542 CandidatesWithoutStackFixups.push_back(C);
8543 }
8544
8545 // Is an unused register available? If so, we won't modify the stack, so
8546 // we can outline with the same frame type as those that don't save LR.
8547 else if (findRegisterToSaveLRTo(C)) {
8548 NumBytesNoStackCalls += 12;
8549 C.setCallInfo(MachineOutlinerRegSave, 12);
8550 CandidatesWithoutStackFixups.push_back(C);
8551 }
8552
8553 // Is SP used in the sequence at all? If not, we don't have to modify
8554 // the stack, so we are guaranteed to get the same frame.
8555 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
8556 NumBytesNoStackCalls += 12;
8557 C.setCallInfo(MachineOutlinerDefault, 12);
8558 CandidatesWithoutStackFixups.push_back(C);
8559 }
8560
8561 // If we outline this, we need to modify the stack. Pretend we don't
8562 // outline this by saving all of its bytes.
8563 else {
8564 NumBytesNoStackCalls += SequenceSize;
8565 }
8566 }
8567
8568 // If there are no places where we have to save LR, then note that we
8569 // don't have to update the stack. Otherwise, give every candidate the
8570 // default call type, as long as it's safe to do so.
8571 if (!AllStackInstrsSafe ||
8572 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
8573 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
8574 FrameID = MachineOutlinerNoLRSave;
8575 if (RepeatedSequenceLocs.size() < 2)
8576 return std::nullopt;
8577 } else {
8578 SetCandidateCallInfo(MachineOutlinerDefault, 12);
8579
8580 // Bugzilla ID: 46767
8581 // TODO: Check if fixing up the stack more than once is safe so we can
8582 // outline these.
8583 //
8584 // An outline resulting in a caller that requires stack fixups at the
8585 // callsite to a callee that also requires stack fixups can happen when
8586 // there are no available registers at the candidate callsite for a
8587 // candidate that itself also has calls.
8588 //
8589 // In other words if function_containing_sequence in the following pseudo
8590 // assembly requires that we save LR at the point of the call, but there
8591 // are no available registers: in this case we save using SP and as a
8592 // result the SP offsets requires stack fixups by multiples of 16.
8593 //
8594 // function_containing_sequence:
8595 // ...
8596 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
8597 // call OUTLINED_FUNCTION_N
8598 // restore LR from SP
8599 // ...
8600 //
8601 // OUTLINED_FUNCTION_N:
8602 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
8603 // ...
8604 // bl foo
8605 // restore LR from SP
8606 // ret
8607 //
8608 // Because the code to handle more than one stack fixup does not
8609 // currently have the proper checks for legality, these cases will assert
8610 // in the AArch64 MachineOutliner. This is because the code to do this
8611 // needs more hardening, testing, better checks that generated code is
8612 // legal, etc and because it is only verified to handle a single pass of
8613 // stack fixup.
8614 //
8615 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
8616 // these cases until they are known to be handled. Bugzilla 46767 is
8617 // referenced in comments at the assert site.
8618 //
8619 // To avoid asserting (or generating non-legal code on noassert builds)
8620 // we remove all candidates which would need more than one stack fixup by
8621 // pruning the cases where the candidate has calls while also having no
8622 // available LR and having no available general purpose registers to copy
8623 // LR to (ie one extra stack save/restore).
8624 //
8625 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
8626 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
8627 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
8628 return (llvm::any_of(C, IsCall)) &&
8629 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
8630 !findRegisterToSaveLRTo(C));
8631 });
8632 }
8633 }
8634
8635 // If we dropped all of the candidates, bail out here.
8636 if (RepeatedSequenceLocs.size() < 2) {
8637 RepeatedSequenceLocs.clear();
8638 return std::nullopt;
8639 }
8640 }
8641
8642 // Does every candidate's MBB contain a call? If so, then we might have a call
8643 // in the range.
8644 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
8645 // Check if the range contains a call. These require a save + restore of the
8646 // link register.
8647 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
8648 bool ModStackToSaveLR = false;
8649 if (std::any_of(FirstCand.begin(), std::prev(FirstCand.end()),
8650 [](const MachineInstr &MI) { return MI.isCall(); }))
8651 ModStackToSaveLR = true;
8652
8653 // Handle the last instruction separately. If this is a tail call, then the
8654 // last instruction is a call. We don't want to save + restore in this case.
8655 // However, it could be possible that the last instruction is a call without
8656 // it being valid to tail call this sequence. We should consider this as
8657 // well.
8658 else if (FrameID != MachineOutlinerThunk &&
8659 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
8660 ModStackToSaveLR = true;
8661
8662 if (ModStackToSaveLR) {
8663 // We can't fix up the stack. Bail out.
8664 if (!AllStackInstrsSafe) {
8665 RepeatedSequenceLocs.clear();
8666 return std::nullopt;
8667 }
8668
8669 // Save + restore LR.
8670 NumBytesToCreateFrame += 8;
8671 }
8672 }
8673
8674 // If we have CFI instructions, we can only outline if the outlined section
8675 // can be a tail call
8676 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
8677 return std::nullopt;
8678
8679 return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
8680 NumBytesToCreateFrame, FrameID);
8681}
8682
8684 Function &F, std::vector<outliner::Candidate> &Candidates) const {
8685 // If a bunch of candidates reach this point they must agree on their return
8686 // address signing. It is therefore enough to just consider the signing
8687 // behaviour of one of them
8688 const auto &CFn = Candidates.front().getMF()->getFunction();
8689
8690 // Since all candidates belong to the same module, just copy the
8691 // function-level attributes of an arbitrary function.
8692 if (CFn.hasFnAttribute("sign-return-address"))
8693 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
8694 if (CFn.hasFnAttribute("sign-return-address-key"))
8695 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
8696
8697 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
8698}
8699
8701 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
8702 const Function &F = MF.getFunction();
8703
8704 // Can F be deduplicated by the linker? If it can, don't outline from it.
8705 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
8706 return false;
8707
8708 // Don't outline from functions with section markings; the program could
8709 // expect that all the code is in the named section.
8710 // FIXME: Allow outlining from multiple functions with the same section
8711 // marking.
8712 if (F.hasSection())
8713 return false;
8714
8715 // Outlining from functions with redzones is unsafe since the outliner may
8716 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
8717 // outline from it.
8719 if (!AFI || AFI->hasRedZone().value_or(true))
8720 return false;
8721
8722 // FIXME: Determine whether it is safe to outline from functions which contain
8723 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
8724 // outlined together and ensure it is safe to outline with async unwind info,
8725 // required for saving & restoring VG around calls.
8726 if (AFI->hasStreamingModeChanges())
8727 return false;
8728
8729 // FIXME: Teach the outliner to generate/handle Windows unwind info.
8731 return false;
8732
8733 // It's safe to outline from MF.
8734 return true;
8735}
8736
8739 unsigned &Flags) const {
8741 "Must track liveness!");
8743 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
8744 Ranges;
8745 // According to the AArch64 Procedure Call Standard, the following are
8746 // undefined on entry/exit from a function call:
8747 //
8748 // * Registers x16, x17, (and thus w16, w17)
8749 // * Condition codes (and thus the NZCV register)
8750 //
8751 // If any of these registers are used inside or live across an outlined
8752 // function, then they may be modified later, either by the compiler or
8753 // some other tool (like the linker).
8754 //
8755 // To avoid outlining in these situations, partition each block into ranges
8756 // where these registers are dead. We will only outline from those ranges.
8758 auto AreAllUnsafeRegsDead = [&LRU]() {
8759 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
8760 LRU.available(AArch64::NZCV);
8761 };
8762
8763 // We need to know if LR is live across an outlining boundary later on in
8764 // order to decide how we'll create the outlined call, frame, etc.
8765 //
8766 // It's pretty expensive to check this for *every candidate* within a block.
8767 // That's some potentially n^2 behaviour, since in the worst case, we'd need
8768 // to compute liveness from the end of the block for O(n) candidates within
8769 // the block.
8770 //
8771 // So, to improve the average case, let's keep track of liveness from the end
8772 // of the block to the beginning of *every outlinable range*. If we know that
8773 // LR is available in every range we could outline from, then we know that
8774 // we don't need to check liveness for any candidate within that range.
8775 bool LRAvailableEverywhere = true;
8776 // Compute liveness bottom-up.
8777 LRU.addLiveOuts(MBB);
8778 // Update flags that require info about the entire MBB.
8779 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
8780 if (MI.isCall() && !MI.isTerminator())
8781 Flags |= MachineOutlinerMBBFlags::HasCalls;
8782 };
8783 // Range: [RangeBegin, RangeEnd)
8784 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
8785 unsigned RangeLen;
8786 auto CreateNewRangeStartingAt =
8787 [&RangeBegin, &RangeEnd,
8788 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
8789 RangeBegin = NewBegin;
8790 RangeEnd = std::next(RangeBegin);
8791 RangeLen = 0;
8792 };
8793 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
8794 // At least one unsafe register is not dead. We do not want to outline at
8795 // this point. If it is long enough to outline from, save the range
8796 // [RangeBegin, RangeEnd).
8797 if (RangeLen > 1)
8798 Ranges.push_back(std::make_pair(RangeBegin, RangeEnd));
8799 };
8800 // Find the first point where all unsafe registers are dead.
8801 // FIND: <safe instr> <-- end of first potential range
8802 // SKIP: <unsafe def>
8803 // SKIP: ... everything between ...
8804 // SKIP: <unsafe use>
8805 auto FirstPossibleEndPt = MBB.instr_rbegin();
8806 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
8807 LRU.stepBackward(*FirstPossibleEndPt);
8808 // Update flags that impact how we outline across the entire block,
8809 // regardless of safety.
8810 UpdateWholeMBBFlags(*FirstPossibleEndPt);
8811 if (AreAllUnsafeRegsDead())
8812 break;
8813 }
8814 // If we exhausted the entire block, we have no safe ranges to outline.
8815 if (FirstPossibleEndPt == MBB.instr_rend())
8816 return Ranges;
8817 // Current range.
8818 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
8819 // StartPt points to the first place where all unsafe registers
8820 // are dead (if there is any such point). Begin partitioning the MBB into
8821 // ranges.
8822 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
8823 LRU.stepBackward(MI);
8824 UpdateWholeMBBFlags(MI);
8825 if (!AreAllUnsafeRegsDead()) {
8826 SaveRangeIfNonEmpty();
8827 CreateNewRangeStartingAt(MI.getIterator());
8828 continue;
8829 }
8830 LRAvailableEverywhere &= LRU.available(AArch64::LR);
8831 RangeBegin = MI.getIterator();
8832 ++RangeLen;
8833 }
8834 // Above loop misses the last (or only) range. If we are still safe, then
8835 // let's save the range.
8836 if (AreAllUnsafeRegsDead())
8837 SaveRangeIfNonEmpty();
8838 if (Ranges.empty())
8839 return Ranges;
8840 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
8841 // the order.
8842 std::reverse(Ranges.begin(), Ranges.end());
8843 // If there is at least one outlinable range where LR is unavailable
8844 // somewhere, remember that.
8845 if (!LRAvailableEverywhere)
8846 Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
8847 return Ranges;
8848}
8849
8852 unsigned Flags) const {
8853 MachineInstr &MI = *MIT;
8854 MachineBasicBlock *MBB = MI.getParent();
8855 MachineFunction *MF = MBB->getParent();
8857
8858 // Don't outline anything used for return address signing. The outlined
8859 // function will get signed later if needed
8860 switch (MI.getOpcode()) {
8861 case AArch64::PACM:
8862 case AArch64::PACIASP:
8863 case AArch64::PACIBSP:
8864 case AArch64::PACIASPPC:
8865 case AArch64::PACIBSPPC:
8866 case AArch64::AUTIASP:
8867 case AArch64::AUTIBSP:
8868 case AArch64::AUTIASPPCi:
8869 case AArch64::AUTIASPPCr:
8870 case AArch64::AUTIBSPPCi:
8871 case AArch64::AUTIBSPPCr:
8872 case AArch64::RETAA:
8873 case AArch64::RETAB:
8874 case AArch64::RETAASPPCi:
8875 case AArch64::RETAASPPCr:
8876 case AArch64::RETABSPPCi:
8877 case AArch64::RETABSPPCr:
8878 case AArch64::EMITBKEY:
8879 case AArch64::PAUTH_PROLOGUE:
8880 case AArch64::PAUTH_EPILOGUE:
8882 }
8883
8884 // Don't outline LOHs.
8885 if (FuncInfo->getLOHRelated().count(&MI))
8887
8888 // We can only outline these if we will tail call the outlined function, or
8889 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
8890 // in a tail call.
8891 //
8892 // FIXME: If the proper fixups for the offset are implemented, this should be
8893 // possible.
8894 if (MI.isCFIInstruction())
8896
8897 // Is this a terminator for a basic block?
8898 if (MI.isTerminator())
8899 // TargetInstrInfo::getOutliningType has already filtered out anything
8900 // that would break this, so we can allow it here.
8902
8903 // Make sure none of the operands are un-outlinable.
8904 for (const MachineOperand &MOP : MI.operands()) {
8905 // A check preventing CFI indices was here before, but only CFI
8906 // instructions should have those.
8907 assert(!MOP.isCFIIndex());
8908
8909 // If it uses LR or W30 explicitly, then don't touch it.
8910 if (MOP.isReg() && !MOP.isImplicit() &&
8911 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
8913 }
8914
8915 // Special cases for instructions that can always be outlined, but will fail
8916 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
8917 // be outlined because they don't require a *specific* value to be in LR.
8918 if (MI.getOpcode() == AArch64::ADRP)
8920
8921 // If MI is a call we might be able to outline it. We don't want to outline
8922 // any calls that rely on the position of items on the stack. When we outline
8923 // something containing a call, we have to emit a save and restore of LR in
8924 // the outlined function. Currently, this always happens by saving LR to the
8925 // stack. Thus, if we outline, say, half the parameters for a function call
8926 // plus the call, then we'll break the callee's expectations for the layout
8927 // of the stack.
8928 //
8929 // FIXME: Allow calls to functions which construct a stack frame, as long
8930 // as they don't access arguments on the stack.
8931 // FIXME: Figure out some way to analyze functions defined in other modules.
8932 // We should be able to compute the memory usage based on the IR calling
8933 // convention, even if we can't see the definition.
8934 if (MI.isCall()) {
8935 // Get the function associated with the call. Look at each operand and find
8936 // the one that represents the callee and get its name.
8937 const Function *Callee = nullptr;
8938 for (const MachineOperand &MOP : MI.operands()) {
8939 if (MOP.isGlobal()) {
8940 Callee = dyn_cast<Function>(MOP.getGlobal());
8941 break;
8942 }
8943 }
8944
8945 // Never outline calls to mcount. There isn't any rule that would require
8946 // this, but the Linux kernel's "ftrace" feature depends on it.
8947 if (Callee && Callee->getName() == "\01_mcount")
8949
8950 // If we don't know anything about the callee, assume it depends on the
8951 // stack layout of the caller. In that case, it's only legal to outline
8952 // as a tail-call. Explicitly list the call instructions we know about so we
8953 // don't get unexpected results with call pseudo-instructions.
8954 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
8955 if (MI.getOpcode() == AArch64::BLR ||
8956 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
8957 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
8958
8959 if (!Callee)
8960 return UnknownCallOutlineType;
8961
8962 // We have a function we have information about. Check it if it's something
8963 // can safely outline.
8964 MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
8965
8966 // We don't know what's going on with the callee at all. Don't touch it.
8967 if (!CalleeMF)
8968 return UnknownCallOutlineType;
8969
8970 // Check if we know anything about the callee saves on the function. If we
8971 // don't, then don't touch it, since that implies that we haven't
8972 // computed anything about its stack frame yet.
8973 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
8974 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
8975 MFI.getNumObjects() > 0)
8976 return UnknownCallOutlineType;
8977
8978 // At this point, we can say that CalleeMF ought to not pass anything on the
8979 // stack. Therefore, we can outline it.
8981 }
8982
8983 // Don't touch the link register or W30.
8984 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
8985 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
8987
8988 // Don't outline BTI instructions, because that will prevent the outlining
8989 // site from being indirectly callable.
8990 if (hasBTISemantics(MI))
8992
8994}
8995
8996void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
8997 for (MachineInstr &MI : MBB) {
8998 const MachineOperand *Base;
8999 TypeSize Width(0, false);
9000 int64_t Offset;
9001 bool OffsetIsScalable;
9002
9003 // Is this a load or store with an immediate offset with SP as the base?
9004 if (!MI.mayLoadOrStore() ||
9005 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
9006 &RI) ||
9007 (Base->isReg() && Base->getReg() != AArch64::SP))
9008 continue;
9009
9010 // It is, so we have to fix it up.
9011 TypeSize Scale(0U, false);
9012 int64_t Dummy1, Dummy2;
9013
9015 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
9016 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
9017 assert(Scale != 0 && "Unexpected opcode!");
9018 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
9019
9020 // We've pushed the return address to the stack, so add 16 to the offset.
9021 // This is safe, since we already checked if it would overflow when we
9022 // checked if this instruction was legal to outline.
9023 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
9024 StackOffsetOperand.setImm(NewImm);
9025 }
9026}
9027
9029 const AArch64InstrInfo *TII,
9030 bool ShouldSignReturnAddr) {
9031 if (!ShouldSignReturnAddr)
9032 return;
9033
9034 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
9037 TII->get(AArch64::PAUTH_EPILOGUE))
9039}
9040
9043 const outliner::OutlinedFunction &OF) const {
9044
9046
9048 FI->setOutliningStyle("Tail Call");
9050 // For thunk outlining, rewrite the last instruction from a call to a
9051 // tail-call.
9052 MachineInstr *Call = &*--MBB.instr_end();
9053 unsigned TailOpcode;
9054 if (Call->getOpcode() == AArch64::BL) {
9055 TailOpcode = AArch64::TCRETURNdi;
9056 } else {
9057 assert(Call->getOpcode() == AArch64::BLR ||
9058 Call->getOpcode() == AArch64::BLRNoIP);
9059 TailOpcode = AArch64::TCRETURNriALL;
9060 }
9061 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
9062 .add(Call->getOperand(0))
9063 .addImm(0);
9064 MBB.insert(MBB.end(), TC);
9065 Call->eraseFromParent();
9066
9067 FI->setOutliningStyle("Thunk");
9068 }
9069
9070 bool IsLeafFunction = true;
9071
9072 // Is there a call in the outlined range?
9073 auto IsNonTailCall = [](const MachineInstr &MI) {
9074 return MI.isCall() && !MI.isReturn();
9075 };
9076
9077 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
9078 // Fix up the instructions in the range, since we're going to modify the
9079 // stack.
9080
9081 // Bugzilla ID: 46767
9082 // TODO: Check if fixing up twice is safe so we can outline these.
9084 "Can only fix up stack references once");
9085 fixupPostOutline(MBB);
9086
9087 IsLeafFunction = false;
9088
9089 // LR has to be a live in so that we can save it.
9090 if (!MBB.isLiveIn(AArch64::LR))
9091 MBB.addLiveIn(AArch64::LR);
9092
9095
9098 Et = std::prev(MBB.end());
9099
9100 // Insert a save before the outlined region
9101 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9102 .addReg(AArch64::SP, RegState::Define)
9103 .addReg(AArch64::LR)
9104 .addReg(AArch64::SP)
9105 .addImm(-16);
9106 It = MBB.insert(It, STRXpre);
9107
9109 const TargetSubtargetInfo &STI = MF.getSubtarget();
9110 const MCRegisterInfo *MRI = STI.getRegisterInfo();
9111 unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
9112
9113 // Add a CFI saying the stack was moved 16 B down.
9114 int64_t StackPosEntry =
9116 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9117 .addCFIIndex(StackPosEntry)
9119
9120 // Add a CFI saying that the LR that we want to find is now 16 B higher
9121 // than before.
9122 int64_t LRPosEntry = MF.addFrameInst(
9123 MCCFIInstruction::createOffset(nullptr, DwarfReg, -16));
9124 BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
9125 .addCFIIndex(LRPosEntry)
9127 }
9128
9129 // Insert a restore before the terminator for the function.
9130 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9131 .addReg(AArch64::SP, RegState::Define)
9132 .addReg(AArch64::LR, RegState::Define)
9133 .addReg(AArch64::SP)
9134 .addImm(16);
9135 Et = MBB.insert(Et, LDRXpost);
9136 }
9137
9138 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
9139
9140 // If this is a tail call outlined function, then there's already a return.
9143 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9144 return;
9145 }
9146
9147 // It's not a tail call, so we have to insert the return ourselves.
9148
9149 // LR has to be a live in so that we can return to it.
9150 if (!MBB.isLiveIn(AArch64::LR))
9151 MBB.addLiveIn(AArch64::LR);
9152
9153 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
9154 .addReg(AArch64::LR);
9155 MBB.insert(MBB.end(), ret);
9156
9157 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
9158
9159 FI->setOutliningStyle("Function");
9160
9161 // Did we have to modify the stack by saving the link register?
9163 return;
9164
9165 // We modified the stack.
9166 // Walk over the basic block and fix up all the stack accesses.
9167 fixupPostOutline(MBB);
9168}
9169
9173
9174 // Are we tail calling?
9175 if (C.CallConstructionID == MachineOutlinerTailCall) {
9176 // If yes, then we can just branch to the label.
9177 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
9178 .addGlobalAddress(M.getNamedValue(MF.getName()))
9179 .addImm(0));
9180 return It;
9181 }
9182
9183 // Are we saving the link register?
9184 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
9185 C.CallConstructionID == MachineOutlinerThunk) {
9186 // No, so just insert the call.
9187 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9188 .addGlobalAddress(M.getNamedValue(MF.getName())));
9189 return It;
9190 }
9191
9192 // We want to return the spot where we inserted the call.
9194
9195 // Instructions for saving and restoring LR around the call instruction we're
9196 // going to insert.
9197 MachineInstr *Save;
9198 MachineInstr *Restore;
9199 // Can we save to a register?
9200 if (C.CallConstructionID == MachineOutlinerRegSave) {
9201 // FIXME: This logic should be sunk into a target-specific interface so that
9202 // we don't have to recompute the register.
9203 Register Reg = findRegisterToSaveLRTo(C);
9204 assert(Reg && "No callee-saved register available?");
9205
9206 // LR has to be a live in so that we can save it.
9207 if (!MBB.isLiveIn(AArch64::LR))
9208 MBB.addLiveIn(AArch64::LR);
9209
9210 // Save and restore LR from Reg.
9211 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
9212 .addReg(AArch64::XZR)
9213 .addReg(AArch64::LR)
9214 .addImm(0);
9215 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
9216 .addReg(AArch64::XZR)
9217 .addReg(Reg)
9218 .addImm(0);
9219 } else {
9220 // We have the default case. Save and restore from SP.
9221 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
9222 .addReg(AArch64::SP, RegState::Define)
9223 .addReg(AArch64::LR)
9224 .addReg(AArch64::SP)
9225 .addImm(-16);
9226 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
9227 .addReg(AArch64::SP, RegState::Define)
9228 .addReg(AArch64::LR, RegState::Define)
9229 .addReg(AArch64::SP)
9230 .addImm(16);
9231 }
9232
9233 It = MBB.insert(It, Save);
9234 It++;
9235
9236 // Insert the call.
9237 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
9238 .addGlobalAddress(M.getNamedValue(MF.getName())));
9239 CallPt = It;
9240 It++;
9241
9242 It = MBB.insert(It, Restore);
9243 return CallPt;
9244}
9245
9247 MachineFunction &MF) const {
9248 return MF.getFunction().hasMinSize();
9249}
9250
9253 DebugLoc &DL,
9254 bool AllowSideEffects) const {
9255 const MachineFunction &MF = *MBB.getParent();
9257 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
9258
9259 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
9260 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
9261 } else if (STI.hasSVE()) {
9262 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
9263 .addImm(0)
9264 .addImm(0);
9265 } else {
9266 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
9267 .addImm(0);
9268 }
9269}
9270
9271std::optional<DestSourcePair>
9273
9274 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
9275 // and zero immediate operands used as an alias for mov instruction.
9276 if (MI.getOpcode() == AArch64::ORRWrs &&
9277 MI.getOperand(1).getReg() == AArch64::WZR &&
9278 MI.getOperand(3).getImm() == 0x0 &&
9279 // Check that the w->w move is not a zero-extending w->x mov.
9280 (!MI.getOperand(0).getReg().isVirtual() ||
9281 MI.getOperand(0).getSubReg() == 0) &&
9282 (!MI.getOperand(0).getReg().isPhysical() ||
9283 MI.findRegisterDefOperandIdx(MI.getOperand(0).getReg() - AArch64::W0 +
9284 AArch64::X0,
9285 /*TRI=*/nullptr) == -1))
9286 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9287
9288 if (MI.getOpcode() == AArch64::ORRXrs &&
9289 MI.getOperand(1).getReg() == AArch64::XZR &&
9290 MI.getOperand(3).getImm() == 0x0)
9291 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9292
9293 return std::nullopt;
9294}
9295
9296std::optional<DestSourcePair>
9298 if (MI.getOpcode() == AArch64::ORRWrs &&
9299 MI.getOperand(1).getReg() == AArch64::WZR &&
9300 MI.getOperand(3).getImm() == 0x0)
9301 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
9302 return std::nullopt;
9303}
9304
9305std::optional<RegImmPair>
9307 int Sign = 1;
9308 int64_t Offset = 0;
9309
9310 // TODO: Handle cases where Reg is a super- or sub-register of the
9311 // destination register.
9312 const MachineOperand &Op0 = MI.getOperand(0);
9313 if (!Op0.isReg() || Reg != Op0.getReg())
9314 return std::nullopt;
9315
9316 switch (MI.getOpcode()) {
9317 default:
9318 return std::nullopt;
9319 case AArch64::SUBWri:
9320 case AArch64::SUBXri:
9321 case AArch64::SUBSWri:
9322 case AArch64::SUBSXri:
9323 Sign *= -1;
9324 [[fallthrough]];
9325 case AArch64::ADDSWri:
9326 case AArch64::ADDSXri:
9327 case AArch64::ADDWri:
9328 case AArch64::ADDXri: {
9329 // TODO: Third operand can be global address (usually some string).
9330 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
9331 !MI.getOperand(2).isImm())
9332 return std::nullopt;
9333 int Shift = MI.getOperand(3).getImm();
9334 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
9335 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
9336 }
9337 }
9338 return RegImmPair{MI.getOperand(1).getReg(), Offset};
9339}
9340
9341/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
9342/// the destination register then, if possible, describe the value in terms of
9343/// the source register.
9344static std::optional<ParamLoadedValue>
9346 const TargetInstrInfo *TII,
9347 const TargetRegisterInfo *TRI) {
9348 auto DestSrc = TII->isCopyLikeInstr(MI);
9349 if (!DestSrc)
9350 return std::nullopt;
9351
9352 Register DestReg = DestSrc->Destination->getReg();
9353 Register SrcReg = DestSrc->Source->getReg();
9354
9355 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
9356
9357 // If the described register is the destination, just return the source.
9358 if (DestReg == DescribedReg)
9359 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9360
9361 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
9362 if (MI.getOpcode() == AArch64::ORRWrs &&
9363 TRI->isSuperRegister(DestReg, DescribedReg))
9364 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
9365
9366 // We may need to describe the lower part of a ORRXrs move.
9367 if (MI.getOpcode() == AArch64::ORRXrs &&
9368 TRI->isSubRegister(DestReg, DescribedReg)) {
9369 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
9370 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
9371 }
9372
9373 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
9374 "Unhandled ORR[XW]rs copy case");
9375
9376 return std::nullopt;
9377}
9378
9380 // Functions cannot be split to different sections on AArch64 if they have
9381 // a red zone. This is because relaxing a cross-section branch may require
9382 // incrementing the stack pointer to spill a register, which would overwrite
9383 // the red zone.
9384 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
9385 return false;
9386
9388}
9389
9391 const MachineBasicBlock &MBB) const {
9392 // Asm Goto blocks can contain conditional branches to goto labels, which can
9393 // get moved out of range of the branch instruction.
9394 auto isAsmGoto = [](const MachineInstr &MI) {
9395 return MI.getOpcode() == AArch64::INLINEASM_BR;
9396 };
9397 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
9398 return false;
9399
9400 // Because jump tables are label-relative instead of table-relative, they all
9401 // must be in the same section or relocation fixup handling will fail.
9402
9403 // Check if MBB is a jump table target
9405 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
9406 return llvm::is_contained(JTE.MBBs, &MBB);
9407 };
9408 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
9409 return false;
9410
9411 // Check if MBB contains a jump table lookup
9412 for (const MachineInstr &MI : MBB) {
9413 switch (MI.getOpcode()) {
9414 case TargetOpcode::G_BRJT:
9415 case AArch64::JumpTableDest32:
9416 case AArch64::JumpTableDest16:
9417 case AArch64::JumpTableDest8:
9418 return false;
9419 default:
9420 continue;
9421 }
9422 }
9423
9424 // MBB isn't a special case, so it's safe to be split to the cold section.
9425 return true;
9426}
9427
9428std::optional<ParamLoadedValue>
9430 Register Reg) const {
9431 const MachineFunction *MF = MI.getMF();
9433 switch (MI.getOpcode()) {
9434 case AArch64::MOVZWi:
9435 case AArch64::MOVZXi: {
9436 // MOVZWi may be used for producing zero-extended 32-bit immediates in
9437 // 64-bit parameters, so we need to consider super-registers.
9438 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
9439 return std::nullopt;
9440
9441 if (!MI.getOperand(1).isImm())
9442 return std::nullopt;
9443 int64_t Immediate = MI.getOperand(1).getImm();
9444 int Shift = MI.getOperand(2).getImm();
9445 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
9446 nullptr);
9447 }
9448 case AArch64::ORRWrs:
9449 case AArch64::ORRXrs:
9450 return describeORRLoadedValue(MI, Reg, this, TRI);
9451 }
9452
9454}
9455
9457 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
9458 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
9459 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
9460 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
9461
9462 // Anyexts are nops.
9463 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
9464 return true;
9465
9466 Register DefReg = ExtMI.getOperand(0).getReg();
9467 if (!MRI.hasOneNonDBGUse(DefReg))
9468 return false;
9469
9470 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
9471 // addressing mode.
9472 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
9473 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
9474}
9475
9477 return get(Opc).TSFlags & AArch64::ElementSizeMask;
9478}
9479
9480bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
9481 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
9482}
9483
9484bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
9485 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
9486}
9487
9488unsigned int
9490 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
9491}
9492
9493bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
9494 unsigned Scale) const {
9495 if (Offset && Scale)
9496 return false;
9497
9498 // Check Reg + Imm
9499 if (!Scale) {
9500 // 9-bit signed offset
9501 if (isInt<9>(Offset))
9502 return true;
9503
9504 // 12-bit unsigned offset
9505 unsigned Shift = Log2_64(NumBytes);
9506 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
9507 // Must be a multiple of NumBytes (NumBytes is a power of 2)
9508 (Offset >> Shift) << Shift == Offset)
9509 return true;
9510 return false;
9511 }
9512
9513 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
9514 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
9515}
9516
9518 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
9519 return AArch64::BLRNoIP;
9520 else
9521 return AArch64::BLR;
9522}
9523
9526 Register TargetReg, bool FrameSetup) const {
9527 assert(TargetReg != AArch64::SP && "New top of stack cannot aleady be in SP");
9528
9530 MachineFunction &MF = *MBB.getParent();
9531 const AArch64InstrInfo *TII =
9532 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
9533 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
9535
9536 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
9537 MachineBasicBlock *LoopTestMBB =
9539 MF.insert(MBBInsertPoint, LoopTestMBB);
9540 MachineBasicBlock *LoopBodyMBB =
9542 MF.insert(MBBInsertPoint, LoopBodyMBB);
9544 MF.insert(MBBInsertPoint, ExitMBB);
9545 MachineInstr::MIFlag Flags =
9547
9548 // LoopTest:
9549 // SUB SP, SP, #ProbeSize
9550 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
9551 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
9552
9553 // CMP SP, TargetReg
9554 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
9555 AArch64::XZR)
9556 .addReg(AArch64::SP)
9557 .addReg(TargetReg)
9559 .setMIFlags(Flags);
9560
9561 // B.<Cond> LoopExit
9562 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
9564 .addMBB(ExitMBB)
9565 .setMIFlags(Flags);
9566
9567 // STR XZR, [SP]
9568 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
9569 .addReg(AArch64::XZR)
9570 .addReg(AArch64::SP)
9571 .addImm(0)
9572 .setMIFlags(Flags);
9573
9574 // B loop
9575 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
9576 .addMBB(LoopTestMBB)
9577 .setMIFlags(Flags);
9578
9579 // LoopExit:
9580 // MOV SP, TargetReg
9581 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
9582 .addReg(TargetReg)
9583 .addImm(0)
9585 .setMIFlags(Flags);
9586
9587 // LDR XZR, [SP]
9588 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
9589 .addReg(AArch64::XZR, RegState::Define)
9590 .addReg(AArch64::SP)
9591 .addImm(0)
9592 .setMIFlags(Flags);
9593
9594 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
9596
9597 LoopTestMBB->addSuccessor(ExitMBB);
9598 LoopTestMBB->addSuccessor(LoopBodyMBB);
9599 LoopBodyMBB->addSuccessor(LoopTestMBB);
9600 MBB.addSuccessor(LoopTestMBB);
9601
9602 // Update liveins.
9603 if (MF.getRegInfo().reservedRegsFrozen())
9604 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
9605
9606 return ExitMBB->begin();
9607}
9608
9609namespace {
9610class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
9611 MachineFunction *MF;
9612 const TargetInstrInfo *TII;
9613 const TargetRegisterInfo *TRI;
9615
9616 /// The block of the loop
9617 MachineBasicBlock *LoopBB;
9618 /// The conditional branch of the loop
9619 MachineInstr *CondBranch;
9620 /// The compare instruction for loop control
9621 MachineInstr *Comp;
9622 /// The number of the operand of the loop counter value in Comp
9623 unsigned CompCounterOprNum;
9624 /// The instruction that updates the loop counter value
9625 MachineInstr *Update;
9626 /// The number of the operand of the loop counter value in Update
9627 unsigned UpdateCounterOprNum;
9628 /// The initial value of the loop counter
9629 Register Init;
9630 /// True iff Update is a predecessor of Comp
9631 bool IsUpdatePriorComp;
9632
9633 /// The normalized condition used by createTripCountGreaterCondition()
9635
9636public:
9637 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
9638 MachineInstr *Comp, unsigned CompCounterOprNum,
9639 MachineInstr *Update, unsigned UpdateCounterOprNum,
9640 Register Init, bool IsUpdatePriorComp,
9642 : MF(Comp->getParent()->getParent()),
9643 TII(MF->getSubtarget().getInstrInfo()),
9644 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
9645 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
9646 CompCounterOprNum(CompCounterOprNum), Update(Update),
9647 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
9648 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
9649
9650 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
9651 // Make the instructions for loop control be placed in stage 0.
9652 // The predecessors of Comp are considered by the caller.
9653 return MI == Comp;
9654 }
9655
9656 std::optional<bool> createTripCountGreaterCondition(
9657 int TC, MachineBasicBlock &MBB,
9658 SmallVectorImpl<MachineOperand> &CondParam) override {
9659 // A branch instruction will be inserted as "if (Cond) goto epilogue".
9660 // Cond is normalized for such use.
9661 // The predecessors of the branch are assumed to have already been inserted.
9662 CondParam = Cond;
9663 return {};
9664 }
9665
9666 void createRemainingIterationsGreaterCondition(
9668 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
9669
9670 void setPreheader(MachineBasicBlock *NewPreheader) override {}
9671
9672 void adjustTripCount(int TripCountAdjust) override {}
9673
9674 void disposed() override {}
9675 bool isMVEExpanderSupported() override { return true; }
9676};
9677} // namespace
9678
9679/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
9680/// is replaced by ReplaceReg. The output register is newly created.
9681/// The other operands are unchanged from MI.
9682static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
9683 Register ReplaceReg, MachineBasicBlock &MBB,
9684 MachineBasicBlock::iterator InsertTo) {
9687 const TargetRegisterInfo *TRI =
9690 Register Result = 0;
9691 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
9692 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
9693 Result = MRI.createVirtualRegister(
9694 MRI.getRegClass(NewMI->getOperand(0).getReg()));
9695 NewMI->getOperand(I).setReg(Result);
9696 } else if (I == ReplaceOprNum) {
9697 MRI.constrainRegClass(
9698 ReplaceReg,
9699 TII->getRegClass(NewMI->getDesc(), I, TRI, *MBB.getParent()));
9700 NewMI->getOperand(I).setReg(ReplaceReg);
9701 }
9702 }
9703 MBB.insert(InsertTo, NewMI);
9704 return Result;
9705}
9706
9707void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
9710 // Create and accumulate conditions for next TC iterations.
9711 // Example:
9712 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
9713 // # iteration of the kernel
9714 //
9715 // # insert the following instructions
9716 // cond = CSINCXr 0, 0, C, implicit $nzcv
9717 // counter = ADDXri counter, 1 # clone from this->Update
9718 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
9719 // cond = CSINCXr cond, cond, C, implicit $nzcv
9720 // ... (repeat TC times)
9721 // SUBSXri cond, 0, implicit-def $nzcv
9722
9723 assert(CondBranch->getOpcode() == AArch64::Bcc);
9724 // CondCode to exit the loop
9726 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
9727 if (CondBranch->getOperand(1).getMBB() == LoopBB)
9729
9730 // Accumulate conditions to exit the loop
9731 Register AccCond = AArch64::XZR;
9732
9733 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
9734 auto AccumulateCond = [&](Register CurCond,
9736 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
9737 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
9738 .addReg(NewCond, RegState::Define)
9739 .addReg(CurCond)
9740 .addReg(CurCond)
9742 return NewCond;
9743 };
9744
9745 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
9746 // Update and Comp for I==0 are already exists in MBB
9747 // (MBB is an unrolled kernel)
9748 Register Counter;
9749 for (int I = 0; I <= TC; ++I) {
9750 Register NextCounter;
9751 if (I != 0)
9752 NextCounter =
9753 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
9754
9755 AccCond = AccumulateCond(AccCond, CC);
9756
9757 if (I != TC) {
9758 if (I == 0) {
9759 if (Update != Comp && IsUpdatePriorComp) {
9760 Counter =
9761 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
9762 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
9763 MBB.end());
9764 } else {
9765 // can use already calculated value
9766 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
9767 }
9768 } else if (Update != Comp) {
9769 NextCounter =
9770 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
9771 }
9772 }
9773 Counter = NextCounter;
9774 }
9775 } else {
9776 Register Counter;
9777 if (LastStage0Insts.empty()) {
9778 // use initial counter value (testing if the trip count is sufficient to
9779 // be executed by pipelined code)
9780 Counter = Init;
9781 if (IsUpdatePriorComp)
9782 Counter =
9783 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
9784 } else {
9785 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
9786 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
9787 }
9788
9789 for (int I = 0; I <= TC; ++I) {
9790 Register NextCounter;
9791 NextCounter =
9792 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
9793 AccCond = AccumulateCond(AccCond, CC);
9794 if (I != TC && Update != Comp)
9795 NextCounter =
9796 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
9797 Counter = NextCounter;
9798 }
9799 }
9800
9801 // If AccCond == 0, the remainder is greater than TC.
9802 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
9803 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
9804 .addReg(AccCond)
9805 .addImm(0)
9806 .addImm(0);
9807 Cond.clear();
9809}
9810
9811static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
9812 Register &RegMBB, Register &RegOther) {
9813 assert(Phi.getNumOperands() == 5);
9814 if (Phi.getOperand(2).getMBB() == MBB) {
9815 RegMBB = Phi.getOperand(1).getReg();
9816 RegOther = Phi.getOperand(3).getReg();
9817 } else {
9818 assert(Phi.getOperand(4).getMBB() == MBB);
9819 RegMBB = Phi.getOperand(3).getReg();
9820 RegOther = Phi.getOperand(1).getReg();
9821 }
9822}
9823
9824static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB) {
9825 if (!Reg.isVirtual())
9826 return false;
9827 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
9828 return MRI.getVRegDef(Reg)->getParent() != BB;
9829}
9830
9831/// If Reg is an induction variable, return true and set some parameters
9832static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
9833 MachineInstr *&UpdateInst,
9834 unsigned &UpdateCounterOprNum, Register &InitReg,
9835 bool &IsUpdatePriorComp) {
9836 // Example:
9837 //
9838 // Preheader:
9839 // InitReg = ...
9840 // LoopBB:
9841 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
9842 // Reg = COPY Reg0 ; COPY is ignored.
9843 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
9844 // ; Reg is the value calculated in the previous
9845 // ; iteration, so IsUpdatePriorComp == false.
9846
9847 if (LoopBB->pred_size() != 2)
9848 return false;
9849 if (!Reg.isVirtual())
9850 return false;
9851 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
9852 UpdateInst = nullptr;
9853 UpdateCounterOprNum = 0;
9854 InitReg = 0;
9855 IsUpdatePriorComp = true;
9856 Register CurReg = Reg;
9857 while (true) {
9858 MachineInstr *Def = MRI.getVRegDef(CurReg);
9859 if (Def->getParent() != LoopBB)
9860 return false;
9861 if (Def->isCopy()) {
9862 // Ignore copy instructions unless they contain subregisters
9863 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
9864 return false;
9865 CurReg = Def->getOperand(1).getReg();
9866 } else if (Def->isPHI()) {
9867 if (InitReg != 0)
9868 return false;
9869 if (!UpdateInst)
9870 IsUpdatePriorComp = false;
9871 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
9872 } else {
9873 if (UpdateInst)
9874 return false;
9875 switch (Def->getOpcode()) {
9876 case AArch64::ADDSXri:
9877 case AArch64::ADDSWri:
9878 case AArch64::SUBSXri:
9879 case AArch64::SUBSWri:
9880 case AArch64::ADDXri:
9881 case AArch64::ADDWri:
9882 case AArch64::SUBXri:
9883 case AArch64::SUBWri:
9884 UpdateInst = Def;
9885 UpdateCounterOprNum = 1;
9886 break;
9887 case AArch64::ADDSXrr:
9888 case AArch64::ADDSWrr:
9889 case AArch64::SUBSXrr:
9890 case AArch64::SUBSWrr:
9891 case AArch64::ADDXrr:
9892 case AArch64::ADDWrr:
9893 case AArch64::SUBXrr:
9894 case AArch64::SUBWrr:
9895 UpdateInst = Def;
9896 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
9897 UpdateCounterOprNum = 1;
9898 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
9899 UpdateCounterOprNum = 2;
9900 else
9901 return false;
9902 break;
9903 default:
9904 return false;
9905 }
9906 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
9907 }
9908
9909 if (!CurReg.isVirtual())
9910 return false;
9911 if (Reg == CurReg)
9912 break;
9913 }
9914
9915 if (!UpdateInst)
9916 return false;
9917
9918 return true;
9919}
9920
9921std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
9923 // Accept loops that meet the following conditions
9924 // * The conditional branch is BCC
9925 // * The compare instruction is ADDS/SUBS/WHILEXX
9926 // * One operand of the compare is an induction variable and the other is a
9927 // loop invariant value
9928 // * The induction variable is incremented/decremented by a single instruction
9929 // * Does not contain CALL or instructions which have unmodeled side effects
9930
9931 for (MachineInstr &MI : *LoopBB)
9932 if (MI.isCall() || MI.hasUnmodeledSideEffects())
9933 // This instruction may use NZCV, which interferes with the instruction to
9934 // be inserted for loop control.
9935 return nullptr;
9936
9937 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
9939 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
9940 return nullptr;
9941
9942 // Infinite loops are not supported
9943 if (TBB == LoopBB && FBB == LoopBB)
9944 return nullptr;
9945
9946 // Must be conditional branch
9947 if (TBB != LoopBB && FBB == nullptr)
9948 return nullptr;
9949
9950 assert((TBB == LoopBB || FBB == LoopBB) &&
9951 "The Loop must be a single-basic-block loop");
9952
9953 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
9955
9956 if (CondBranch->getOpcode() != AArch64::Bcc)
9957 return nullptr;
9958
9959 // Normalization for createTripCountGreaterCondition()
9960 if (TBB == LoopBB)
9962
9963 MachineInstr *Comp = nullptr;
9964 unsigned CompCounterOprNum = 0;
9965 for (MachineInstr &MI : reverse(*LoopBB)) {
9966 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
9967 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
9968 // operands is a loop invariant value
9969
9970 switch (MI.getOpcode()) {
9971 case AArch64::SUBSXri:
9972 case AArch64::SUBSWri:
9973 case AArch64::ADDSXri:
9974 case AArch64::ADDSWri:
9975 Comp = &MI;
9976 CompCounterOprNum = 1;
9977 break;
9978 case AArch64::ADDSWrr:
9979 case AArch64::ADDSXrr:
9980 case AArch64::SUBSWrr:
9981 case AArch64::SUBSXrr:
9982 Comp = &MI;
9983 break;
9984 default:
9985 if (isWhileOpcode(MI.getOpcode())) {
9986 Comp = &MI;
9987 break;
9988 }
9989 return nullptr;
9990 }
9991
9992 if (CompCounterOprNum == 0) {
9993 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
9994 CompCounterOprNum = 2;
9995 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
9996 CompCounterOprNum = 1;
9997 else
9998 return nullptr;
9999 }
10000 break;
10001 }
10002 }
10003 if (!Comp)
10004 return nullptr;
10005
10006 MachineInstr *Update = nullptr;
10007 Register Init;
10008 bool IsUpdatePriorComp;
10009 unsigned UpdateCounterOprNum;
10010 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
10011 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
10012 return nullptr;
10013
10014 return std::make_unique<AArch64PipelinerLoopInfo>(
10015 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
10016 Init, IsUpdatePriorComp, Cond);
10017}
10018
10019#define GET_INSTRINFO_HELPERS
10020#define GET_INSTRMAP_INFO
10021#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
@ AK_Write
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static unsigned getBranchDisplacementBits(unsigned Opc)
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static void appendVGScaledOffsetExpr(SmallVectorImpl< char > &Expr, int NumBytes, int NumVGScaledBytes, unsigned VG, llvm::raw_string_ostream &Comment)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc, unsigned ZeroReg=0, bool CheckZeroReg=false)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, unsigned Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ HasCalls
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
BlockVerifier::State From
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
bool End
Definition: ELF_riscv.cpp:480
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static LVOptions Options
Definition: LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
Module.h This file contains the declarations for the Module class.
const char LLVMTargetMachineRef TM
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallVector class.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
const SetOfInstructions & getLOHRelated() const
bool needsDwarfUnwindInfo(const MachineFunction &MF) const
void setOutliningStyle(std::string Style)
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static void decomposeStackOffsetForFrameOffsets(const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors, int64_t &NumDataVectors)
Returns the offset in parts to which this frame offset can be decomposed for the purpose of describin...
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
std::optional< RegImmPair > isAddImmediate(const MachineInstr &MI, Register Reg) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
uint64_t getElementSizeForOpcode(unsigned Opc) const
Returns the vector element size (B, H, S or D) of an SVE opcode.
outliner::InstrType getOutliningTypeImpl(MachineBasicBlock::iterator &MIT, unsigned Flags) const override
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const override
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
bool isWhileOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE WHILE## instruction.
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
MachineBasicBlock::iterator insertOutlinedCall(Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It, MachineFunction &MF, outliner::Candidate &C) const override
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
static bool isSEHInstruction(const MachineInstr &MI)
Return true if the instructions is a SEH instruciton used for unwinding on Windows.
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
SmallVector< std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > > getOutlinableRanges(MachineBasicBlock &MBB, unsigned &Flags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstrIdxForVirtReg) const override
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool useMachineCombiner() const override
AArch64 supports MachineCombiner.
ArrayRef< std::pair< MachineMemOperand::Flags, const char * > > getSerializableMachineMemOperandTargetFlags() const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool isExtendLikelyToBeFolded(MachineInstr &ExtMI, MachineRegisterInfo &MRI) const override
static bool isFalkorShiftExtFast(const MachineInstr &MI)
Returns true if the instruction has a shift by immediate that can be executed in one cycle less.
std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const override
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
bool expandPostRAPseudo(MachineInstr &MI) const override
unsigned int getTailDuplicateSize(CodeGenOptLevel OptLevel) const override
bool isFunctionSafeToOutlineFrom(MachineFunction &MF, bool OutlineFromLinkOnceODRs) const override
static bool isFpOrNEON(const MachineInstr &MI)
Returns whether the instruction is FP or NEON.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg) const override
bool shouldOutlineFromFunctionByDefault(MachineFunction &MF) const override
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, unsigned DestReg, unsigned SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
void buildClearRegister(Register Reg, MachineBasicBlock &MBB, MachineBasicBlock::iterator Iter, DebugLoc &DL, bool AllowSideEffects=true) const override
void buildOutlinedFrame(MachineBasicBlock &MBB, MachineFunction &MF, const outliner::OutlinedFunction &OF) const override
ArrayRef< std::pair< unsigned, const char * > > getSerializableDirectMachineOperandTargetFlags() const override
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
bool isFunctionSafeToSplit(const MachineFunction &MF) const override
bool isAssociativeAndCommutative(const MachineInstr &Inst, bool Invert) const override
Return true when Inst is associative and commutative so that it can be reassociated.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
std::optional< outliner::OutlinedFunction > getOutliningCandidateInfo(std::vector< outliner::Candidate > &RepeatedSequenceLocs) const override
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
std::pair< unsigned, unsigned > decomposeMachineOperandsTargetFlags(unsigned TF) const override
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
bool isMBBSafeToSplitToCold(const MachineBasicBlock &MBB) const override
bool isAsCheapAsAMove(const MachineInstr &MI) const override
bool isLegalAddressingMode(unsigned NumBytes, int64_t Offset, unsigned Scale) const
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
ArrayRef< std::pair< unsigned, const char * > > getSerializableBitmaskMachineOperandTargetFlags() const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isPTestLikeOpcode(unsigned Opc) const
Returns true if the opcode is for an SVE instruction that sets the condition codes as if it's results...
void mergeOutliningCandidateAttributes(Function &F, std::vector< outliner::Candidate > &Candidates) const override
static void decomposeStackOffsetForDwarfOffsets(const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized)
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition: ArrayRef.h:168
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
bool empty() const
Definition: DenseMap.h:98
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:220
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition: Function.h:698
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:695
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
void stepBackward(const MachineInstr &MI)
Updates liveness when stepping backwards over the instruction MI.
void addLiveOuts(const MachineBasicBlock &MBB)
Adds registers living out of block MBB.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition: MCAsmInfo.h:56
bool usesWindowsCFI() const
Definition: MCAsmInfo.h:799
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition: MCDwarf.h:583
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition: MCDwarf.h:556
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition: MCDwarf.h:541
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition: MCDwarf.h:647
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Definition: MCInstBuilder.h:43
Instances of this class represent a single low-level machine instruction.
Definition: MCInst.h:184
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegisterInfo base class - We assume that the target defines a static array of MCRegisterDesc object...
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
constexpr bool isValid() const
Definition: MCRegister.h:81
static constexpr unsigned NoRegister
Definition: MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition: Metadata.h:1541
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
unsigned pred_size() const
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
bool isLiveIn(MCPhysReg Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
MBBSectionID getSectionID() const
Returns the section ID of this basic block.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
Instructions::iterator instr_iterator
iterator getLastNonDebugInstr(bool SkipPseudoOp=true)
Returns an iterator to the last non-debug instruction in the basic block, or end().
bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
instr_iterator instr_end()
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
iterator_range< succ_iterator > successors()
instr_iterator getFirstInstrTerminator()
Same getFirstTerminator but it ignores bundles and return an instr_iterator instead.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
MachineModuleInfo & getMMI() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineInstr * CloneMachineInstr(const MachineInstr *Orig)
Create a new MachineInstr which is a copy of Orig, identical in all ways except the instruction has n...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:950
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
Definition: MachineInstr.h:396
uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:572
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
bool isFullCopy() const
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:566
bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
Definition: MachineInstr.h:782
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:498
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
uint32_t getFlags() const
Return the MI flags bitvector.
Definition: MachineInstr.h:391
int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
MI-level patchpoint operands.
Definition: StackMaps.h:76
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition: StackMaps.h:104
void enterBasicBlockEnd(MachineBasicBlock &MBB)
Start tracking liveness from the end of basic block MBB.
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register FindUnusedReg(const TargetRegisterClass *RC) const
Find an unused register of the specified register class.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:71
static constexpr bool isPhysicalRegister(unsigned Reg)
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:65
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
Represents a location in source code.
Definition: SMLoc.h:23
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:412
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition: SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition: SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
Definition: SmallString.h:254
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
MI-level stackmap operands.
Definition: StackMaps.h:35
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition: StackMaps.h:50
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
int64_t getFixed() const
Returns the fixed component of the stack.
Definition: TypeSize.h:49
int64_t getScalable() const
Returns the scalable component of the stack.
Definition: TypeSize.h:52
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition: TypeSize.h:44
static StackOffset getScalable(int64_t Scalable)
Definition: TypeSize.h:43
static StackOffset getFixed(int64_t Fixed)
Definition: TypeSize.h:42
MI-level Statepoint operands.
Definition: StackMaps.h:158
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition: StackMaps.h:207
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< unsigned, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
unsigned UnsafeFPMath
UnsafeFPMath - This flag is enabled when the -enable-unsafe-fp-math flag is specified on the command ...
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition: TypeSize.h:342
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition: TypeSize.h:345
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition: TypeSize.h:171
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition: TypeSize.h:168
self_iterator getIterator()
Definition: ilist_node.h:132
NodeTy * getNextNode()
Get the next node, or nullptr for the list tail.
Definition: ilist_node.h:353
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:661
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
const SysReg * lookupSysRegByName(StringRef)
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
Reg
All possible values of the reg field in the ModR/M byte.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
constexpr double e
Definition: MathExtras.h:31
InstrType
Represents how an instruction should be mapped by the outliner.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1722
static bool isCondBranchOpcode(int Opc)
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
static bool isIndirectBranchOpcode(int Opc)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:280
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:330
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1729
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ MULADDXI_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ MULSUBXI_OP1
@ FMLAv4i32_indexed_OP1
@ MULADDWI_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv8i8_OP1
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ MULADDv8i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULSUBv8i8_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBWI_OP1
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
@ MULSUBv8i8_OP2
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition: CodeGen.h:54
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
unsigned getUndefRegState(bool B)
unsigned getDefRegState(bool B)
unsigned getKillRegState(bool B)
bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition: MathExtras.h:244
DWARFExpression::Operation Op
static bool isUncondBranchOpcode(int Opc)
unsigned encodeSLEB128(int64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a SLEB128 value to an output stream.
Definition: LEB128.h:23
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2051
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
static const MachineMemOperand::Flags MOSuppressPair
unsigned encodeULEB128(uint64_t Value, raw_ostream &OS, unsigned PadTo=0)
Utility function to encode a ULEB128 value to an output stream.
Definition: LEB128.h:80
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:509
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
static const MachineMemOperand::Flags MOStridedAccess
@ Default
The result values are uniform if and only if all operands are uniform.
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
Definition: LivePhysRegs.h:215
Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Description of the encoding of one expression Op.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
static const MBBSectionID ColdSectionID
MachineJumpTableEntry - One jump table in the jump table info.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
Used to describe a register and immediate addition.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
MachineBasicBlock::iterator begin()
MachineBasicBlock::iterator end()
The information necessary to create an outlined function for some class of candidate.
unsigned FrameConstructionID
Target-defined identifier for constructing a frame for this function.