LLVM 22.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallSet.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/IR/GlobalValue.h"
45#include "llvm/IR/Module.h"
46#include "llvm/MC/MCAsmInfo.h"
47#include "llvm/MC/MCInst.h"
49#include "llvm/MC/MCInstrDesc.h"
54#include "llvm/Support/LEB128.h"
58#include <cassert>
59#include <cstdint>
60#include <iterator>
61#include <utility>
62
63using namespace llvm;
64
65#define GET_INSTRINFO_CTOR_DTOR
66#include "AArch64GenInstrInfo.inc"
67
69 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),
70 cl::desc("Restrict range of CB instructions (DEBUG)"));
71
73 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
74 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
75
77 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
78 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
79
81 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
82 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
83
85 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
86 cl::desc("Restrict range of B instructions (DEBUG)"));
87
89 "aarch64-search-limit", cl::Hidden, cl::init(2048),
90 cl::desc("Restrict range of instructions to search for the "
91 "machine-combiner gather pattern optimization"));
92
94 : AArch64GenInstrInfo(STI, AArch64::ADJCALLSTACKDOWN,
95 AArch64::ADJCALLSTACKUP, AArch64::CATCHRET),
96 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
97
98/// GetInstSize - Return the number of bytes of code the specified
99/// instruction may be. This returns the maximum number of bytes.
101 const MachineBasicBlock &MBB = *MI.getParent();
102 const MachineFunction *MF = MBB.getParent();
103 const Function &F = MF->getFunction();
104 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
105
106 {
107 auto Op = MI.getOpcode();
108 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
109 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
110 }
111
112 // Meta-instructions emit no code.
113 if (MI.isMetaInstruction())
114 return 0;
115
116 // FIXME: We currently only handle pseudoinstructions that don't get expanded
117 // before the assembly printer.
118 unsigned NumBytes = 0;
119 const MCInstrDesc &Desc = MI.getDesc();
120
121 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
122 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
123
124 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
125 if (!MFI->shouldSignReturnAddress(MF))
126 return NumBytes;
127
128 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
129 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
130 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
131 return NumBytes;
132 }
133
134 // Size should be preferably set in
135 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
136 // Specific cases handle instructions of variable sizes
137 switch (Desc.getOpcode()) {
138 default:
139 if (Desc.getSize())
140 return Desc.getSize();
141
142 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
143 // with fixed constant size but not specified in .td file) is a normal
144 // 4-byte insn.
145 NumBytes = 4;
146 break;
147 case TargetOpcode::STACKMAP:
148 // The upper bound for a stackmap intrinsic is the full length of its shadow
149 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
150 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
151 break;
152 case TargetOpcode::PATCHPOINT:
153 // The size of the patchpoint intrinsic is the number of bytes requested
154 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
155 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
156 break;
157 case TargetOpcode::STATEPOINT:
158 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
159 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
160 // No patch bytes means a normal call inst is emitted
161 if (NumBytes == 0)
162 NumBytes = 4;
163 break;
164 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
165 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
166 // instructions are expanded to the specified number of NOPs. Otherwise,
167 // they are expanded to 36-byte XRay sleds.
168 NumBytes =
169 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
170 break;
171 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
172 case TargetOpcode::PATCHABLE_TAIL_CALL:
173 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
174 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
175 NumBytes = 36;
176 break;
177 case TargetOpcode::PATCHABLE_EVENT_CALL:
178 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
179 NumBytes = 24;
180 break;
181
182 case AArch64::SPACE:
183 NumBytes = MI.getOperand(1).getImm();
184 break;
185 case TargetOpcode::BUNDLE:
186 NumBytes = getInstBundleLength(MI);
187 break;
188 }
189
190 return NumBytes;
191}
192
193unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
194 unsigned Size = 0;
196 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
197 while (++I != E && I->isInsideBundle()) {
198 assert(!I->isBundle() && "No nested bundle!");
200 }
201 return Size;
202}
203
206 // Block ends with fall-through condbranch.
207 switch (LastInst->getOpcode()) {
208 default:
209 llvm_unreachable("Unknown branch instruction?");
210 case AArch64::Bcc:
211 Target = LastInst->getOperand(1).getMBB();
212 Cond.push_back(LastInst->getOperand(0));
213 break;
214 case AArch64::CBZW:
215 case AArch64::CBZX:
216 case AArch64::CBNZW:
217 case AArch64::CBNZX:
218 Target = LastInst->getOperand(1).getMBB();
219 Cond.push_back(MachineOperand::CreateImm(-1));
220 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
221 Cond.push_back(LastInst->getOperand(0));
222 break;
223 case AArch64::TBZW:
224 case AArch64::TBZX:
225 case AArch64::TBNZW:
226 case AArch64::TBNZX:
227 Target = LastInst->getOperand(2).getMBB();
228 Cond.push_back(MachineOperand::CreateImm(-1));
229 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
230 Cond.push_back(LastInst->getOperand(0));
231 Cond.push_back(LastInst->getOperand(1));
232 break;
233 case AArch64::CBWPri:
234 case AArch64::CBXPri:
235 case AArch64::CBWPrr:
236 case AArch64::CBXPrr:
237 Target = LastInst->getOperand(3).getMBB();
238 Cond.push_back(MachineOperand::CreateImm(-1));
239 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
240 Cond.push_back(LastInst->getOperand(0));
241 Cond.push_back(LastInst->getOperand(1));
242 Cond.push_back(LastInst->getOperand(2));
243 break;
244 }
245}
246
247static unsigned getBranchDisplacementBits(unsigned Opc) {
248 switch (Opc) {
249 default:
250 llvm_unreachable("unexpected opcode!");
251 case AArch64::B:
252 return BDisplacementBits;
253 case AArch64::TBNZW:
254 case AArch64::TBZW:
255 case AArch64::TBNZX:
256 case AArch64::TBZX:
257 return TBZDisplacementBits;
258 case AArch64::CBNZW:
259 case AArch64::CBZW:
260 case AArch64::CBNZX:
261 case AArch64::CBZX:
262 return CBZDisplacementBits;
263 case AArch64::Bcc:
264 return BCCDisplacementBits;
265 case AArch64::CBWPri:
266 case AArch64::CBXPri:
267 case AArch64::CBWPrr:
268 case AArch64::CBXPrr:
269 return CBDisplacementBits;
270 }
271}
272
274 int64_t BrOffset) const {
275 unsigned Bits = getBranchDisplacementBits(BranchOp);
276 assert(Bits >= 3 && "max branch displacement must be enough to jump"
277 "over conditional branch expansion");
278 return isIntN(Bits, BrOffset / 4);
279}
280
283 switch (MI.getOpcode()) {
284 default:
285 llvm_unreachable("unexpected opcode!");
286 case AArch64::B:
287 return MI.getOperand(0).getMBB();
288 case AArch64::TBZW:
289 case AArch64::TBNZW:
290 case AArch64::TBZX:
291 case AArch64::TBNZX:
292 return MI.getOperand(2).getMBB();
293 case AArch64::CBZW:
294 case AArch64::CBNZW:
295 case AArch64::CBZX:
296 case AArch64::CBNZX:
297 case AArch64::Bcc:
298 return MI.getOperand(1).getMBB();
299 case AArch64::CBWPri:
300 case AArch64::CBXPri:
301 case AArch64::CBWPrr:
302 case AArch64::CBXPrr:
303 return MI.getOperand(3).getMBB();
304 }
305}
306
308 MachineBasicBlock &NewDestBB,
309 MachineBasicBlock &RestoreBB,
310 const DebugLoc &DL,
311 int64_t BrOffset,
312 RegScavenger *RS) const {
313 assert(RS && "RegScavenger required for long branching");
314 assert(MBB.empty() &&
315 "new block should be inserted for expanding unconditional branch");
316 assert(MBB.pred_size() == 1);
317 assert(RestoreBB.empty() &&
318 "restore block should be inserted for restoring clobbered registers");
319
320 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
321 // Offsets outside of the signed 33-bit range are not supported for ADRP +
322 // ADD.
323 if (!isInt<33>(BrOffset))
325 "Branch offsets outside of the signed 33-bit range not supported");
326
327 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
328 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
329 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
330 .addReg(Reg)
331 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
332 .addImm(0);
333 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
334 };
335
336 RS->enterBasicBlockEnd(MBB);
337 // If X16 is unused, we can rely on the linker to insert a range extension
338 // thunk if NewDestBB is out of range of a single B instruction.
339 constexpr Register Reg = AArch64::X16;
340 if (!RS->isRegUsed(Reg)) {
341 insertUnconditionalBranch(MBB, &NewDestBB, DL);
342 RS->setRegUsed(Reg);
343 return;
344 }
345
346 // If there's a free register and it's worth inflating the code size,
347 // manually insert the indirect branch.
348 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
349 if (Scavenged != AArch64::NoRegister &&
350 MBB.getSectionID() == MBBSectionID::ColdSectionID) {
351 buildIndirectBranch(Scavenged, NewDestBB);
352 RS->setRegUsed(Scavenged);
353 return;
354 }
355
356 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
357 // with red zones.
358 AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
359 if (!AFI || AFI->hasRedZone().value_or(true))
361 "Unable to insert indirect branch inside function that has red zone");
362
363 // Otherwise, spill X16 and defer range extension to the linker.
364 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
365 .addReg(AArch64::SP, RegState::Define)
366 .addReg(Reg)
367 .addReg(AArch64::SP)
368 .addImm(-16);
369
370 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
371
372 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
373 .addReg(AArch64::SP, RegState::Define)
375 .addReg(AArch64::SP)
376 .addImm(16);
377}
378
379// Branch analysis.
382 MachineBasicBlock *&FBB,
384 bool AllowModify) const {
385 // If the block has no terminators, it just falls into the block after it.
386 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
387 if (I == MBB.end())
388 return false;
389
390 // Skip over SpeculationBarrierEndBB terminators
391 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
392 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
393 --I;
394 }
395
396 if (!isUnpredicatedTerminator(*I))
397 return false;
398
399 // Get the last instruction in the block.
400 MachineInstr *LastInst = &*I;
401
402 // If there is only one terminator instruction, process it.
403 unsigned LastOpc = LastInst->getOpcode();
404 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
405 if (isUncondBranchOpcode(LastOpc)) {
406 TBB = LastInst->getOperand(0).getMBB();
407 return false;
408 }
409 if (isCondBranchOpcode(LastOpc)) {
410 // Block ends with fall-through condbranch.
411 parseCondBranch(LastInst, TBB, Cond);
412 return false;
413 }
414 return true; // Can't handle indirect branch.
415 }
416
417 // Get the instruction before it if it is a terminator.
418 MachineInstr *SecondLastInst = &*I;
419 unsigned SecondLastOpc = SecondLastInst->getOpcode();
420
421 // If AllowModify is true and the block ends with two or more unconditional
422 // branches, delete all but the first unconditional branch.
423 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
424 while (isUncondBranchOpcode(SecondLastOpc)) {
425 LastInst->eraseFromParent();
426 LastInst = SecondLastInst;
427 LastOpc = LastInst->getOpcode();
428 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
429 // Return now the only terminator is an unconditional branch.
430 TBB = LastInst->getOperand(0).getMBB();
431 return false;
432 }
433 SecondLastInst = &*I;
434 SecondLastOpc = SecondLastInst->getOpcode();
435 }
436 }
437
438 // If we're allowed to modify and the block ends in a unconditional branch
439 // which could simply fallthrough, remove the branch. (Note: This case only
440 // matters when we can't understand the whole sequence, otherwise it's also
441 // handled by BranchFolding.cpp.)
442 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
443 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
444 LastInst->eraseFromParent();
445 LastInst = SecondLastInst;
446 LastOpc = LastInst->getOpcode();
447 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
448 assert(!isUncondBranchOpcode(LastOpc) &&
449 "unreachable unconditional branches removed above");
450
451 if (isCondBranchOpcode(LastOpc)) {
452 // Block ends with fall-through condbranch.
453 parseCondBranch(LastInst, TBB, Cond);
454 return false;
455 }
456 return true; // Can't handle indirect branch.
457 }
458 SecondLastInst = &*I;
459 SecondLastOpc = SecondLastInst->getOpcode();
460 }
461
462 // If there are three terminators, we don't know what sort of block this is.
463 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
464 return true;
465
466 // If the block ends with a B and a Bcc, handle it.
467 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
468 parseCondBranch(SecondLastInst, TBB, Cond);
469 FBB = LastInst->getOperand(0).getMBB();
470 return false;
471 }
472
473 // If the block ends with two unconditional branches, handle it. The second
474 // one is not executed, so remove it.
475 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
476 TBB = SecondLastInst->getOperand(0).getMBB();
477 I = LastInst;
478 if (AllowModify)
479 I->eraseFromParent();
480 return false;
481 }
482
483 // ...likewise if it ends with an indirect branch followed by an unconditional
484 // branch.
485 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
486 I = LastInst;
487 if (AllowModify)
488 I->eraseFromParent();
489 return true;
490 }
491
492 // Otherwise, can't handle this.
493 return true;
494}
495
497 MachineBranchPredicate &MBP,
498 bool AllowModify) const {
499 // For the moment, handle only a block which ends with a cb(n)zx followed by
500 // a fallthrough. Why this? Because it is a common form.
501 // TODO: Should we handle b.cc?
502
503 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
504 if (I == MBB.end())
505 return true;
506
507 // Skip over SpeculationBarrierEndBB terminators
508 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
509 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
510 --I;
511 }
512
513 if (!isUnpredicatedTerminator(*I))
514 return true;
515
516 // Get the last instruction in the block.
517 MachineInstr *LastInst = &*I;
518 unsigned LastOpc = LastInst->getOpcode();
519 if (!isCondBranchOpcode(LastOpc))
520 return true;
521
522 switch (LastOpc) {
523 default:
524 return true;
525 case AArch64::CBZW:
526 case AArch64::CBZX:
527 case AArch64::CBNZW:
528 case AArch64::CBNZX:
529 break;
530 };
531
532 MBP.TrueDest = LastInst->getOperand(1).getMBB();
533 assert(MBP.TrueDest && "expected!");
534 MBP.FalseDest = MBB.getNextNode();
535
536 MBP.ConditionDef = nullptr;
537 MBP.SingleUseCondition = false;
538
539 MBP.LHS = LastInst->getOperand(0);
540 MBP.RHS = MachineOperand::CreateImm(0);
541 MBP.Predicate = (LastOpc == AArch64::CBNZX || LastOpc == AArch64::CBNZW)
542 ? MachineBranchPredicate::PRED_NE
543 : MachineBranchPredicate::PRED_EQ;
544 return false;
545}
546
549 if (Cond[0].getImm() != -1) {
550 // Regular Bcc
551 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
553 } else {
554 // Folded compare-and-branch
555 switch (Cond[1].getImm()) {
556 default:
557 llvm_unreachable("Unknown conditional branch!");
558 case AArch64::CBZW:
559 Cond[1].setImm(AArch64::CBNZW);
560 break;
561 case AArch64::CBNZW:
562 Cond[1].setImm(AArch64::CBZW);
563 break;
564 case AArch64::CBZX:
565 Cond[1].setImm(AArch64::CBNZX);
566 break;
567 case AArch64::CBNZX:
568 Cond[1].setImm(AArch64::CBZX);
569 break;
570 case AArch64::TBZW:
571 Cond[1].setImm(AArch64::TBNZW);
572 break;
573 case AArch64::TBNZW:
574 Cond[1].setImm(AArch64::TBZW);
575 break;
576 case AArch64::TBZX:
577 Cond[1].setImm(AArch64::TBNZX);
578 break;
579 case AArch64::TBNZX:
580 Cond[1].setImm(AArch64::TBZX);
581 break;
582
583 // Cond is { -1, Opcode, CC, Op0, Op1 }
584 case AArch64::CBWPri:
585 case AArch64::CBXPri:
586 case AArch64::CBWPrr:
587 case AArch64::CBXPrr: {
588 // Pseudos using standard 4bit Arm condition codes
590 static_cast<AArch64CC::CondCode>(Cond[2].getImm());
592 }
593 }
594 }
595
596 return false;
597}
598
600 int *BytesRemoved) const {
601 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
602 if (I == MBB.end())
603 return 0;
604
605 if (!isUncondBranchOpcode(I->getOpcode()) &&
606 !isCondBranchOpcode(I->getOpcode()))
607 return 0;
608
609 // Remove the branch.
610 I->eraseFromParent();
611
612 I = MBB.end();
613
614 if (I == MBB.begin()) {
615 if (BytesRemoved)
616 *BytesRemoved = 4;
617 return 1;
618 }
619 --I;
620 if (!isCondBranchOpcode(I->getOpcode())) {
621 if (BytesRemoved)
622 *BytesRemoved = 4;
623 return 1;
624 }
625
626 // Remove the branch.
627 I->eraseFromParent();
628 if (BytesRemoved)
629 *BytesRemoved = 8;
630
631 return 2;
632}
633
634void AArch64InstrInfo::instantiateCondBranch(
637 if (Cond[0].getImm() != -1) {
638 // Regular Bcc
639 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
640 } else {
641 // Folded compare-and-branch
642 // Note that we use addOperand instead of addReg to keep the flags.
643
644 // cbz, cbnz
645 const MachineInstrBuilder MIB =
646 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
647
648 // tbz/tbnz
649 if (Cond.size() > 3)
650 MIB.add(Cond[3]);
651
652 // cb
653 if (Cond.size() > 4)
654 MIB.add(Cond[4]);
655
656 MIB.addMBB(TBB);
657 }
658}
659
662 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
663 // Shouldn't be a fall through.
664 assert(TBB && "insertBranch must not be told to insert a fallthrough");
665
666 if (!FBB) {
667 if (Cond.empty()) // Unconditional branch?
668 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
669 else
670 instantiateCondBranch(MBB, DL, TBB, Cond);
671
672 if (BytesAdded)
673 *BytesAdded = 4;
674
675 return 1;
676 }
677
678 // Two-way conditional branch.
679 instantiateCondBranch(MBB, DL, TBB, Cond);
680 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
681
682 if (BytesAdded)
683 *BytesAdded = 8;
684
685 return 2;
686}
687
688// Find the original register that VReg is copied from.
689static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
690 while (Register::isVirtualRegister(VReg)) {
691 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
692 if (!DefMI->isFullCopy())
693 return VReg;
694 VReg = DefMI->getOperand(1).getReg();
695 }
696 return VReg;
697}
698
699// Determine if VReg is defined by an instruction that can be folded into a
700// csel instruction. If so, return the folded opcode, and the replacement
701// register.
702static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
703 unsigned *NewVReg = nullptr) {
704 VReg = removeCopies(MRI, VReg);
706 return 0;
707
708 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
709 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
710 unsigned Opc = 0;
711 unsigned SrcOpNum = 0;
712 switch (DefMI->getOpcode()) {
713 case AArch64::ADDSXri:
714 case AArch64::ADDSWri:
715 // if NZCV is used, do not fold.
716 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
717 true) == -1)
718 return 0;
719 // fall-through to ADDXri and ADDWri.
720 [[fallthrough]];
721 case AArch64::ADDXri:
722 case AArch64::ADDWri:
723 // add x, 1 -> csinc.
724 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
725 DefMI->getOperand(3).getImm() != 0)
726 return 0;
727 SrcOpNum = 1;
728 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
729 break;
730
731 case AArch64::ORNXrr:
732 case AArch64::ORNWrr: {
733 // not x -> csinv, represented as orn dst, xzr, src.
734 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
735 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
736 return 0;
737 SrcOpNum = 2;
738 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
739 break;
740 }
741
742 case AArch64::SUBSXrr:
743 case AArch64::SUBSWrr:
744 // if NZCV is used, do not fold.
745 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
746 true) == -1)
747 return 0;
748 // fall-through to SUBXrr and SUBWrr.
749 [[fallthrough]];
750 case AArch64::SUBXrr:
751 case AArch64::SUBWrr: {
752 // neg x -> csneg, represented as sub dst, xzr, src.
753 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
754 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
755 return 0;
756 SrcOpNum = 2;
757 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
758 break;
759 }
760 default:
761 return 0;
762 }
763 assert(Opc && SrcOpNum && "Missing parameters");
764
765 if (NewVReg)
766 *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
767 return Opc;
768}
769
772 Register DstReg, Register TrueReg,
773 Register FalseReg, int &CondCycles,
774 int &TrueCycles,
775 int &FalseCycles) const {
776 // Check register classes.
777 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
778 const TargetRegisterClass *RC =
779 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
780 if (!RC)
781 return false;
782
783 // Also need to check the dest regclass, in case we're trying to optimize
784 // something like:
785 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
786 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
787 return false;
788
789 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
790 unsigned ExtraCondLat = Cond.size() != 1;
791
792 // GPRs are handled by csel.
793 // FIXME: Fold in x+1, -x, and ~x when applicable.
794 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
795 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
796 // Single-cycle csel, csinc, csinv, and csneg.
797 CondCycles = 1 + ExtraCondLat;
798 TrueCycles = FalseCycles = 1;
799 if (canFoldIntoCSel(MRI, TrueReg))
800 TrueCycles = 0;
801 else if (canFoldIntoCSel(MRI, FalseReg))
802 FalseCycles = 0;
803 return true;
804 }
805
806 // Scalar floating point is handled by fcsel.
807 // FIXME: Form fabs, fmin, and fmax when applicable.
808 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
809 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
810 CondCycles = 5 + ExtraCondLat;
811 TrueCycles = FalseCycles = 2;
812 return true;
813 }
814
815 // Can't do vectors.
816 return false;
817}
818
821 const DebugLoc &DL, Register DstReg,
823 Register TrueReg, Register FalseReg) const {
824 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
825
826 // Parse the condition code, see parseCondBranch() above.
828 switch (Cond.size()) {
829 default:
830 llvm_unreachable("Unknown condition opcode in Cond");
831 case 1: // b.cc
833 break;
834 case 3: { // cbz/cbnz
835 // We must insert a compare against 0.
836 bool Is64Bit;
837 switch (Cond[1].getImm()) {
838 default:
839 llvm_unreachable("Unknown branch opcode in Cond");
840 case AArch64::CBZW:
841 Is64Bit = false;
842 CC = AArch64CC::EQ;
843 break;
844 case AArch64::CBZX:
845 Is64Bit = true;
846 CC = AArch64CC::EQ;
847 break;
848 case AArch64::CBNZW:
849 Is64Bit = false;
850 CC = AArch64CC::NE;
851 break;
852 case AArch64::CBNZX:
853 Is64Bit = true;
854 CC = AArch64CC::NE;
855 break;
856 }
857 Register SrcReg = Cond[2].getReg();
858 if (Is64Bit) {
859 // cmp reg, #0 is actually subs xzr, reg, #0.
860 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
861 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
862 .addReg(SrcReg)
863 .addImm(0)
864 .addImm(0);
865 } else {
866 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
867 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
868 .addReg(SrcReg)
869 .addImm(0)
870 .addImm(0);
871 }
872 break;
873 }
874 case 4: { // tbz/tbnz
875 // We must insert a tst instruction.
876 switch (Cond[1].getImm()) {
877 default:
878 llvm_unreachable("Unknown branch opcode in Cond");
879 case AArch64::TBZW:
880 case AArch64::TBZX:
881 CC = AArch64CC::EQ;
882 break;
883 case AArch64::TBNZW:
884 case AArch64::TBNZX:
885 CC = AArch64CC::NE;
886 break;
887 }
888 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
889 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
890 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
891 .addReg(Cond[2].getReg())
892 .addImm(
894 else
895 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
896 .addReg(Cond[2].getReg())
897 .addImm(
899 break;
900 }
901 case 5: { // cb
902 // We must insert a cmp, that is a subs
903 // 0 1 2 3 4
904 // Cond is { -1, Opcode, CC, Op0, Op1 }
905 unsigned SUBSOpC, SUBSDestReg;
906 bool IsImm = false;
907 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
908 switch (Cond[1].getImm()) {
909 default:
910 llvm_unreachable("Unknown branch opcode in Cond");
911 case AArch64::CBWPri:
912 SUBSOpC = AArch64::SUBSWri;
913 SUBSDestReg = AArch64::WZR;
914 IsImm = true;
915 break;
916 case AArch64::CBXPri:
917 SUBSOpC = AArch64::SUBSXri;
918 SUBSDestReg = AArch64::XZR;
919 IsImm = true;
920 break;
921 case AArch64::CBWPrr:
922 SUBSOpC = AArch64::SUBSWrr;
923 SUBSDestReg = AArch64::WZR;
924 IsImm = false;
925 break;
926 case AArch64::CBXPrr:
927 SUBSOpC = AArch64::SUBSXrr;
928 SUBSDestReg = AArch64::XZR;
929 IsImm = false;
930 break;
931 }
932
933 if (IsImm)
934 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
935 .addReg(Cond[3].getReg())
936 .addImm(Cond[4].getImm())
937 .addImm(0);
938 else
939 BuildMI(MBB, I, DL, get(SUBSOpC), SUBSDestReg)
940 .addReg(Cond[3].getReg())
941 .addReg(Cond[4].getReg());
942 }
943 }
944
945 unsigned Opc = 0;
946 const TargetRegisterClass *RC = nullptr;
947 bool TryFold = false;
948 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
949 RC = &AArch64::GPR64RegClass;
950 Opc = AArch64::CSELXr;
951 TryFold = true;
952 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
953 RC = &AArch64::GPR32RegClass;
954 Opc = AArch64::CSELWr;
955 TryFold = true;
956 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
957 RC = &AArch64::FPR64RegClass;
958 Opc = AArch64::FCSELDrrr;
959 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
960 RC = &AArch64::FPR32RegClass;
961 Opc = AArch64::FCSELSrrr;
962 }
963 assert(RC && "Unsupported regclass");
964
965 // Try folding simple instructions into the csel.
966 if (TryFold) {
967 unsigned NewVReg = 0;
968 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
969 if (FoldedOpc) {
970 // The folded opcodes csinc, csinc and csneg apply the operation to
971 // FalseReg, so we need to invert the condition.
973 TrueReg = FalseReg;
974 } else
975 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
976
977 // Fold the operation. Leave any dead instructions for DCE to clean up.
978 if (FoldedOpc) {
979 FalseReg = NewVReg;
980 Opc = FoldedOpc;
981 // The extends the live range of NewVReg.
982 MRI.clearKillFlags(NewVReg);
983 }
984 }
985
986 // Pull all virtual register into the appropriate class.
987 MRI.constrainRegClass(TrueReg, RC);
988 MRI.constrainRegClass(FalseReg, RC);
989
990 // Insert the csel.
991 BuildMI(MBB, I, DL, get(Opc), DstReg)
992 .addReg(TrueReg)
993 .addReg(FalseReg)
994 .addImm(CC);
995}
996
997// Return true if Imm can be loaded into a register by a "cheap" sequence of
998// instructions. For now, "cheap" means at most two instructions.
999static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
1000 if (BitSize == 32)
1001 return true;
1002
1003 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
1004 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
1006 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
1007
1008 return Is.size() <= 2;
1009}
1010
1011// FIXME: this implementation should be micro-architecture dependent, so a
1012// micro-architecture target hook should be introduced here in future.
1014 if (Subtarget.hasExynosCheapAsMoveHandling()) {
1015 if (isExynosCheapAsMove(MI))
1016 return true;
1017 return MI.isAsCheapAsAMove();
1018 }
1019
1020 switch (MI.getOpcode()) {
1021 default:
1022 return MI.isAsCheapAsAMove();
1023
1024 case AArch64::ADDWrs:
1025 case AArch64::ADDXrs:
1026 case AArch64::SUBWrs:
1027 case AArch64::SUBXrs:
1028 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
1029
1030 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1031 // ORRXri, it is as cheap as MOV.
1032 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1033 case AArch64::MOVi32imm:
1034 return isCheapImmediate(MI, 32);
1035 case AArch64::MOVi64imm:
1036 return isCheapImmediate(MI, 64);
1037 }
1038}
1039
1040bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
1041 switch (MI.getOpcode()) {
1042 default:
1043 return false;
1044
1045 case AArch64::ADDWrs:
1046 case AArch64::ADDXrs:
1047 case AArch64::ADDSWrs:
1048 case AArch64::ADDSXrs: {
1049 unsigned Imm = MI.getOperand(3).getImm();
1050 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1051 if (ShiftVal == 0)
1052 return true;
1053 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
1054 }
1055
1056 case AArch64::ADDWrx:
1057 case AArch64::ADDXrx:
1058 case AArch64::ADDXrx64:
1059 case AArch64::ADDSWrx:
1060 case AArch64::ADDSXrx:
1061 case AArch64::ADDSXrx64: {
1062 unsigned Imm = MI.getOperand(3).getImm();
1063 switch (AArch64_AM::getArithExtendType(Imm)) {
1064 default:
1065 return false;
1066 case AArch64_AM::UXTB:
1067 case AArch64_AM::UXTH:
1068 case AArch64_AM::UXTW:
1069 case AArch64_AM::UXTX:
1070 return AArch64_AM::getArithShiftValue(Imm) <= 4;
1071 }
1072 }
1073
1074 case AArch64::SUBWrs:
1075 case AArch64::SUBSWrs: {
1076 unsigned Imm = MI.getOperand(3).getImm();
1077 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1078 return ShiftVal == 0 ||
1079 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
1080 }
1081
1082 case AArch64::SUBXrs:
1083 case AArch64::SUBSXrs: {
1084 unsigned Imm = MI.getOperand(3).getImm();
1085 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1086 return ShiftVal == 0 ||
1087 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
1088 }
1089
1090 case AArch64::SUBWrx:
1091 case AArch64::SUBXrx:
1092 case AArch64::SUBXrx64:
1093 case AArch64::SUBSWrx:
1094 case AArch64::SUBSXrx:
1095 case AArch64::SUBSXrx64: {
1096 unsigned Imm = MI.getOperand(3).getImm();
1097 switch (AArch64_AM::getArithExtendType(Imm)) {
1098 default:
1099 return false;
1100 case AArch64_AM::UXTB:
1101 case AArch64_AM::UXTH:
1102 case AArch64_AM::UXTW:
1103 case AArch64_AM::UXTX:
1104 return AArch64_AM::getArithShiftValue(Imm) == 0;
1105 }
1106 }
1107
1108 case AArch64::LDRBBroW:
1109 case AArch64::LDRBBroX:
1110 case AArch64::LDRBroW:
1111 case AArch64::LDRBroX:
1112 case AArch64::LDRDroW:
1113 case AArch64::LDRDroX:
1114 case AArch64::LDRHHroW:
1115 case AArch64::LDRHHroX:
1116 case AArch64::LDRHroW:
1117 case AArch64::LDRHroX:
1118 case AArch64::LDRQroW:
1119 case AArch64::LDRQroX:
1120 case AArch64::LDRSBWroW:
1121 case AArch64::LDRSBWroX:
1122 case AArch64::LDRSBXroW:
1123 case AArch64::LDRSBXroX:
1124 case AArch64::LDRSHWroW:
1125 case AArch64::LDRSHWroX:
1126 case AArch64::LDRSHXroW:
1127 case AArch64::LDRSHXroX:
1128 case AArch64::LDRSWroW:
1129 case AArch64::LDRSWroX:
1130 case AArch64::LDRSroW:
1131 case AArch64::LDRSroX:
1132 case AArch64::LDRWroW:
1133 case AArch64::LDRWroX:
1134 case AArch64::LDRXroW:
1135 case AArch64::LDRXroX:
1136 case AArch64::PRFMroW:
1137 case AArch64::PRFMroX:
1138 case AArch64::STRBBroW:
1139 case AArch64::STRBBroX:
1140 case AArch64::STRBroW:
1141 case AArch64::STRBroX:
1142 case AArch64::STRDroW:
1143 case AArch64::STRDroX:
1144 case AArch64::STRHHroW:
1145 case AArch64::STRHHroX:
1146 case AArch64::STRHroW:
1147 case AArch64::STRHroX:
1148 case AArch64::STRQroW:
1149 case AArch64::STRQroX:
1150 case AArch64::STRSroW:
1151 case AArch64::STRSroX:
1152 case AArch64::STRWroW:
1153 case AArch64::STRWroX:
1154 case AArch64::STRXroW:
1155 case AArch64::STRXroX: {
1156 unsigned IsSigned = MI.getOperand(3).getImm();
1157 return !IsSigned;
1158 }
1159 }
1160}
1161
1162bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1163 unsigned Opc = MI.getOpcode();
1164 switch (Opc) {
1165 default:
1166 return false;
1167 case AArch64::SEH_StackAlloc:
1168 case AArch64::SEH_SaveFPLR:
1169 case AArch64::SEH_SaveFPLR_X:
1170 case AArch64::SEH_SaveReg:
1171 case AArch64::SEH_SaveReg_X:
1172 case AArch64::SEH_SaveRegP:
1173 case AArch64::SEH_SaveRegP_X:
1174 case AArch64::SEH_SaveFReg:
1175 case AArch64::SEH_SaveFReg_X:
1176 case AArch64::SEH_SaveFRegP:
1177 case AArch64::SEH_SaveFRegP_X:
1178 case AArch64::SEH_SetFP:
1179 case AArch64::SEH_AddFP:
1180 case AArch64::SEH_Nop:
1181 case AArch64::SEH_PrologEnd:
1182 case AArch64::SEH_EpilogStart:
1183 case AArch64::SEH_EpilogEnd:
1184 case AArch64::SEH_PACSignLR:
1185 case AArch64::SEH_SaveAnyRegQP:
1186 case AArch64::SEH_SaveAnyRegQPX:
1187 case AArch64::SEH_AllocZ:
1188 case AArch64::SEH_SaveZReg:
1189 case AArch64::SEH_SavePReg:
1190 return true;
1191 }
1192}
1193
1195 Register &SrcReg, Register &DstReg,
1196 unsigned &SubIdx) const {
1197 switch (MI.getOpcode()) {
1198 default:
1199 return false;
1200 case AArch64::SBFMXri: // aka sxtw
1201 case AArch64::UBFMXri: // aka uxtw
1202 // Check for the 32 -> 64 bit extension case, these instructions can do
1203 // much more.
1204 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1205 return false;
1206 // This is a signed or unsigned 32 -> 64 bit extension.
1207 SrcReg = MI.getOperand(1).getReg();
1208 DstReg = MI.getOperand(0).getReg();
1209 SubIdx = AArch64::sub_32;
1210 return true;
1211 }
1212}
1213
1215 const MachineInstr &MIa, const MachineInstr &MIb) const {
1217 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1218 int64_t OffsetA = 0, OffsetB = 0;
1219 TypeSize WidthA(0, false), WidthB(0, false);
1220 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1221
1222 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1223 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1224
1227 return false;
1228
1229 // Retrieve the base, offset from the base and width. Width
1230 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1231 // base are identical, and the offset of a lower memory access +
1232 // the width doesn't overlap the offset of a higher memory access,
1233 // then the memory accesses are different.
1234 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1235 // are assumed to have the same scale (vscale).
1236 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1237 WidthA, TRI) &&
1238 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1239 WidthB, TRI)) {
1240 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1241 OffsetAIsScalable == OffsetBIsScalable) {
1242 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1243 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1244 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1245 if (LowWidth.isScalable() == OffsetAIsScalable &&
1246 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1247 return true;
1248 }
1249 }
1250 return false;
1251}
1252
1254 const MachineBasicBlock *MBB,
1255 const MachineFunction &MF) const {
1257 return true;
1258
1259 // Do not move an instruction that can be recognized as a branch target.
1260 if (hasBTISemantics(MI))
1261 return true;
1262
1263 switch (MI.getOpcode()) {
1264 case AArch64::HINT:
1265 // CSDB hints are scheduling barriers.
1266 if (MI.getOperand(0).getImm() == 0x14)
1267 return true;
1268 break;
1269 case AArch64::DSB:
1270 case AArch64::ISB:
1271 // DSB and ISB also are scheduling barriers.
1272 return true;
1273 case AArch64::MSRpstatesvcrImm1:
1274 // SMSTART and SMSTOP are also scheduling barriers.
1275 return true;
1276 default:;
1277 }
1278 if (isSEHInstruction(MI))
1279 return true;
1280 auto Next = std::next(MI.getIterator());
1281 return Next != MBB->end() && Next->isCFIInstruction();
1282}
1283
1284/// analyzeCompare - For a comparison instruction, return the source registers
1285/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1286/// Return true if the comparison instruction can be analyzed.
1288 Register &SrcReg2, int64_t &CmpMask,
1289 int64_t &CmpValue) const {
1290 // The first operand can be a frame index where we'd normally expect a
1291 // register.
1292 // FIXME: Pass subregisters out of analyzeCompare
1293 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1294 if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())
1295 return false;
1296
1297 switch (MI.getOpcode()) {
1298 default:
1299 break;
1300 case AArch64::PTEST_PP:
1301 case AArch64::PTEST_PP_ANY:
1302 case AArch64::PTEST_PP_FIRST:
1303 SrcReg = MI.getOperand(0).getReg();
1304 SrcReg2 = MI.getOperand(1).getReg();
1305 if (MI.getOperand(2).getSubReg())
1306 return false;
1307
1308 // Not sure about the mask and value for now...
1309 CmpMask = ~0;
1310 CmpValue = 0;
1311 return true;
1312 case AArch64::SUBSWrr:
1313 case AArch64::SUBSWrs:
1314 case AArch64::SUBSWrx:
1315 case AArch64::SUBSXrr:
1316 case AArch64::SUBSXrs:
1317 case AArch64::SUBSXrx:
1318 case AArch64::ADDSWrr:
1319 case AArch64::ADDSWrs:
1320 case AArch64::ADDSWrx:
1321 case AArch64::ADDSXrr:
1322 case AArch64::ADDSXrs:
1323 case AArch64::ADDSXrx:
1324 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1325 SrcReg = MI.getOperand(1).getReg();
1326 SrcReg2 = MI.getOperand(2).getReg();
1327
1328 // FIXME: Pass subregisters out of analyzeCompare
1329 if (MI.getOperand(2).getSubReg())
1330 return false;
1331
1332 CmpMask = ~0;
1333 CmpValue = 0;
1334 return true;
1335 case AArch64::SUBSWri:
1336 case AArch64::ADDSWri:
1337 case AArch64::SUBSXri:
1338 case AArch64::ADDSXri:
1339 SrcReg = MI.getOperand(1).getReg();
1340 SrcReg2 = 0;
1341 CmpMask = ~0;
1342 CmpValue = MI.getOperand(2).getImm();
1343 return true;
1344 case AArch64::ANDSWri:
1345 case AArch64::ANDSXri:
1346 // ANDS does not use the same encoding scheme as the others xxxS
1347 // instructions.
1348 SrcReg = MI.getOperand(1).getReg();
1349 SrcReg2 = 0;
1350 CmpMask = ~0;
1352 MI.getOperand(2).getImm(),
1353 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1354 return true;
1355 }
1356
1357 return false;
1358}
1359
1361 MachineBasicBlock *MBB = Instr.getParent();
1362 assert(MBB && "Can't get MachineBasicBlock here");
1363 MachineFunction *MF = MBB->getParent();
1364 assert(MF && "Can't get MachineFunction here");
1368
1369 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1370 ++OpIdx) {
1371 MachineOperand &MO = Instr.getOperand(OpIdx);
1372 const TargetRegisterClass *OpRegCstraints =
1373 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1374
1375 // If there's no constraint, there's nothing to do.
1376 if (!OpRegCstraints)
1377 continue;
1378 // If the operand is a frame index, there's nothing to do here.
1379 // A frame index operand will resolve correctly during PEI.
1380 if (MO.isFI())
1381 continue;
1382
1383 assert(MO.isReg() &&
1384 "Operand has register constraints without being a register!");
1385
1386 Register Reg = MO.getReg();
1387 if (Reg.isPhysical()) {
1388 if (!OpRegCstraints->contains(Reg))
1389 return false;
1390 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1391 !MRI->constrainRegClass(Reg, OpRegCstraints))
1392 return false;
1393 }
1394
1395 return true;
1396}
1397
1398/// Return the opcode that does not set flags when possible - otherwise
1399/// return the original opcode. The caller is responsible to do the actual
1400/// substitution and legality checking.
1402 // Don't convert all compare instructions, because for some the zero register
1403 // encoding becomes the sp register.
1404 bool MIDefinesZeroReg = false;
1405 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1406 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1407 MIDefinesZeroReg = true;
1408
1409 switch (MI.getOpcode()) {
1410 default:
1411 return MI.getOpcode();
1412 case AArch64::ADDSWrr:
1413 return AArch64::ADDWrr;
1414 case AArch64::ADDSWri:
1415 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1416 case AArch64::ADDSWrs:
1417 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1418 case AArch64::ADDSWrx:
1419 return AArch64::ADDWrx;
1420 case AArch64::ADDSXrr:
1421 return AArch64::ADDXrr;
1422 case AArch64::ADDSXri:
1423 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1424 case AArch64::ADDSXrs:
1425 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1426 case AArch64::ADDSXrx:
1427 return AArch64::ADDXrx;
1428 case AArch64::SUBSWrr:
1429 return AArch64::SUBWrr;
1430 case AArch64::SUBSWri:
1431 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1432 case AArch64::SUBSWrs:
1433 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1434 case AArch64::SUBSWrx:
1435 return AArch64::SUBWrx;
1436 case AArch64::SUBSXrr:
1437 return AArch64::SUBXrr;
1438 case AArch64::SUBSXri:
1439 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1440 case AArch64::SUBSXrs:
1441 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1442 case AArch64::SUBSXrx:
1443 return AArch64::SUBXrx;
1444 }
1445}
1446
1447enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1448
1449/// True when condition flags are accessed (either by writing or reading)
1450/// on the instruction trace starting at From and ending at To.
1451///
1452/// Note: If From and To are from different blocks it's assumed CC are accessed
1453/// on the path.
1456 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1457 // Early exit if To is at the beginning of the BB.
1458 if (To == To->getParent()->begin())
1459 return true;
1460
1461 // Check whether the instructions are in the same basic block
1462 // If not, assume the condition flags might get modified somewhere.
1463 if (To->getParent() != From->getParent())
1464 return true;
1465
1466 // From must be above To.
1467 assert(std::any_of(
1468 ++To.getReverse(), To->getParent()->rend(),
1469 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1470
1471 // We iterate backward starting at \p To until we hit \p From.
1472 for (const MachineInstr &Instr :
1474 if (((AccessToCheck & AK_Write) &&
1475 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1476 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1477 return true;
1478 }
1479 return false;
1480}
1481
1482std::optional<unsigned>
1483AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1484 MachineInstr *Pred,
1485 const MachineRegisterInfo *MRI) const {
1486 unsigned MaskOpcode = Mask->getOpcode();
1487 unsigned PredOpcode = Pred->getOpcode();
1488 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1489 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1490
1491 if (PredIsWhileLike) {
1492 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1493 // instruction and the condition is "any" since WHILcc does an implicit
1494 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1495 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1496 return PredOpcode;
1497
1498 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1499 // redundant since WHILE performs an implicit PTEST with an all active
1500 // mask.
1501 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1502 getElementSizeForOpcode(MaskOpcode) ==
1503 getElementSizeForOpcode(PredOpcode))
1504 return PredOpcode;
1505
1506 // For PTEST_FIRST(PTRUE_ALL, WHILE), the PTEST_FIRST is redundant since
1507 // WHILEcc performs an implicit PTEST with an all active mask, setting
1508 // the N flag as the PTEST_FIRST would.
1509 if (PTest->getOpcode() == AArch64::PTEST_PP_FIRST &&
1510 isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31)
1511 return PredOpcode;
1512
1513 return {};
1514 }
1515
1516 if (PredIsPTestLike) {
1517 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1518 // instruction that sets the flags as PTEST would and the condition is
1519 // "any" since PG is always a subset of the governing predicate of the
1520 // ptest-like instruction.
1521 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1522 return PredOpcode;
1523
1524 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1525
1526 // If the PTEST like instruction's general predicate is not `Mask`, attempt
1527 // to look through a copy and try again. This is because some instructions
1528 // take a predicate whose register class is a subset of its result class.
1529 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1530 PTestLikeMask->getOperand(1).getReg().isVirtual())
1531 PTestLikeMask =
1532 MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1533
1534 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1535 // the element size matches and either the PTEST_LIKE instruction uses
1536 // the same all active mask or the condition is "any".
1537 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1538 getElementSizeForOpcode(MaskOpcode) ==
1539 getElementSizeForOpcode(PredOpcode)) {
1540 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1541 return PredOpcode;
1542 }
1543
1544 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1545 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1546 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1547 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1548 // performed by the compare could consider fewer lanes for these element
1549 // sizes.
1550 //
1551 // For example, consider
1552 //
1553 // ptrue p0.b ; P0=1111-1111-1111-1111
1554 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1555 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1556 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1557 // ; ^ last active
1558 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1559 // ; ^ last active
1560 //
1561 // where the compare generates a canonical all active 32-bit predicate
1562 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1563 // active flag, whereas the PTEST instruction with the same mask doesn't.
1564 // For PTEST_ANY this doesn't apply as the flags in this case would be
1565 // identical regardless of element size.
1566 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1567 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1568 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1569 return PredOpcode;
1570
1571 return {};
1572 }
1573
1574 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1575 // opcode so the PTEST becomes redundant.
1576 switch (PredOpcode) {
1577 case AArch64::AND_PPzPP:
1578 case AArch64::BIC_PPzPP:
1579 case AArch64::EOR_PPzPP:
1580 case AArch64::NAND_PPzPP:
1581 case AArch64::NOR_PPzPP:
1582 case AArch64::ORN_PPzPP:
1583 case AArch64::ORR_PPzPP:
1584 case AArch64::BRKA_PPzP:
1585 case AArch64::BRKPA_PPzPP:
1586 case AArch64::BRKB_PPzP:
1587 case AArch64::BRKPB_PPzPP:
1588 case AArch64::RDFFR_PPz: {
1589 // Check to see if our mask is the same. If not the resulting flag bits
1590 // may be different and we can't remove the ptest.
1591 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1592 if (Mask != PredMask)
1593 return {};
1594 break;
1595 }
1596 case AArch64::BRKN_PPzP: {
1597 // BRKN uses an all active implicit mask to set flags unlike the other
1598 // flag-setting instructions.
1599 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1600 if ((MaskOpcode != AArch64::PTRUE_B) ||
1601 (Mask->getOperand(1).getImm() != 31))
1602 return {};
1603 break;
1604 }
1605 case AArch64::PTRUE_B:
1606 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1607 break;
1608 default:
1609 // Bail out if we don't recognize the input
1610 return {};
1611 }
1612
1613 return convertToFlagSettingOpc(PredOpcode);
1614}
1615
1616/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1617/// operation which could set the flags in an identical manner
1618bool AArch64InstrInfo::optimizePTestInstr(
1619 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1620 const MachineRegisterInfo *MRI) const {
1621 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1622 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1623
1624 if (Pred->isCopy() && PTest->getOpcode() == AArch64::PTEST_PP_FIRST) {
1625 // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
1626 // before the branch to extract each subregister.
1627 auto Op = Pred->getOperand(1);
1628 if (Op.isReg() && Op.getReg().isVirtual() &&
1629 Op.getSubReg() == AArch64::psub0)
1630 Pred = MRI->getUniqueVRegDef(Op.getReg());
1631 }
1632
1633 unsigned PredOpcode = Pred->getOpcode();
1634 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1635 if (!NewOp)
1636 return false;
1637
1638 const TargetRegisterInfo *TRI = &getRegisterInfo();
1639
1640 // If another instruction between Pred and PTest accesses flags, don't remove
1641 // the ptest or update the earlier instruction to modify them.
1642 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1643 return false;
1644
1645 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1646 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1647 // operand to be replaced with an equivalent instruction that also sets the
1648 // flags.
1649 PTest->eraseFromParent();
1650 if (*NewOp != PredOpcode) {
1651 Pred->setDesc(get(*NewOp));
1652 bool succeeded = UpdateOperandRegClass(*Pred);
1653 (void)succeeded;
1654 assert(succeeded && "Operands have incompatible register classes!");
1655 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1656 }
1657
1658 // Ensure that the flags def is live.
1659 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1660 unsigned i = 0, e = Pred->getNumOperands();
1661 for (; i != e; ++i) {
1662 MachineOperand &MO = Pred->getOperand(i);
1663 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1664 MO.setIsDead(false);
1665 break;
1666 }
1667 }
1668 }
1669 return true;
1670}
1671
1672/// Try to optimize a compare instruction. A compare instruction is an
1673/// instruction which produces AArch64::NZCV. It can be truly compare
1674/// instruction
1675/// when there are no uses of its destination register.
1676///
1677/// The following steps are tried in order:
1678/// 1. Convert CmpInstr into an unconditional version.
1679/// 2. Remove CmpInstr if above there is an instruction producing a needed
1680/// condition code or an instruction which can be converted into such an
1681/// instruction.
1682/// Only comparison with zero is supported.
1684 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1685 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1686 assert(CmpInstr.getParent());
1687 assert(MRI);
1688
1689 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1690 int DeadNZCVIdx =
1691 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1692 if (DeadNZCVIdx != -1) {
1693 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1694 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1695 CmpInstr.eraseFromParent();
1696 return true;
1697 }
1698 unsigned Opc = CmpInstr.getOpcode();
1699 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1700 if (NewOpc == Opc)
1701 return false;
1702 const MCInstrDesc &MCID = get(NewOpc);
1703 CmpInstr.setDesc(MCID);
1704 CmpInstr.removeOperand(DeadNZCVIdx);
1705 bool succeeded = UpdateOperandRegClass(CmpInstr);
1706 (void)succeeded;
1707 assert(succeeded && "Some operands reg class are incompatible!");
1708 return true;
1709 }
1710
1711 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1712 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY ||
1713 CmpInstr.getOpcode() == AArch64::PTEST_PP_FIRST)
1714 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1715
1716 if (SrcReg2 != 0)
1717 return false;
1718
1719 // CmpInstr is a Compare instruction if destination register is not used.
1720 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1721 return false;
1722
1723 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1724 return true;
1725 return (CmpValue == 0 || CmpValue == 1) &&
1726 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1727}
1728
1729/// Get opcode of S version of Instr.
1730/// If Instr is S version its opcode is returned.
1731/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1732/// or we are not interested in it.
1733static unsigned sForm(MachineInstr &Instr) {
1734 switch (Instr.getOpcode()) {
1735 default:
1736 return AArch64::INSTRUCTION_LIST_END;
1737
1738 case AArch64::ADDSWrr:
1739 case AArch64::ADDSWri:
1740 case AArch64::ADDSXrr:
1741 case AArch64::ADDSXri:
1742 case AArch64::SUBSWrr:
1743 case AArch64::SUBSWri:
1744 case AArch64::SUBSXrr:
1745 case AArch64::SUBSXri:
1746 return Instr.getOpcode();
1747
1748 case AArch64::ADDWrr:
1749 return AArch64::ADDSWrr;
1750 case AArch64::ADDWri:
1751 return AArch64::ADDSWri;
1752 case AArch64::ADDXrr:
1753 return AArch64::ADDSXrr;
1754 case AArch64::ADDXri:
1755 return AArch64::ADDSXri;
1756 case AArch64::ADCWr:
1757 return AArch64::ADCSWr;
1758 case AArch64::ADCXr:
1759 return AArch64::ADCSXr;
1760 case AArch64::SUBWrr:
1761 return AArch64::SUBSWrr;
1762 case AArch64::SUBWri:
1763 return AArch64::SUBSWri;
1764 case AArch64::SUBXrr:
1765 return AArch64::SUBSXrr;
1766 case AArch64::SUBXri:
1767 return AArch64::SUBSXri;
1768 case AArch64::SBCWr:
1769 return AArch64::SBCSWr;
1770 case AArch64::SBCXr:
1771 return AArch64::SBCSXr;
1772 case AArch64::ANDWri:
1773 return AArch64::ANDSWri;
1774 case AArch64::ANDXri:
1775 return AArch64::ANDSXri;
1776 }
1777}
1778
1779/// Check if AArch64::NZCV should be alive in successors of MBB.
1781 for (auto *BB : MBB->successors())
1782 if (BB->isLiveIn(AArch64::NZCV))
1783 return true;
1784 return false;
1785}
1786
1787/// \returns The condition code operand index for \p Instr if it is a branch
1788/// or select and -1 otherwise.
1789static int
1791 switch (Instr.getOpcode()) {
1792 default:
1793 return -1;
1794
1795 case AArch64::Bcc: {
1796 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1797 assert(Idx >= 2);
1798 return Idx - 2;
1799 }
1800
1801 case AArch64::CSINVWr:
1802 case AArch64::CSINVXr:
1803 case AArch64::CSINCWr:
1804 case AArch64::CSINCXr:
1805 case AArch64::CSELWr:
1806 case AArch64::CSELXr:
1807 case AArch64::CSNEGWr:
1808 case AArch64::CSNEGXr:
1809 case AArch64::FCSELSrrr:
1810 case AArch64::FCSELDrrr: {
1811 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1812 assert(Idx >= 1);
1813 return Idx - 1;
1814 }
1815 }
1816}
1817
1818/// Find a condition code used by the instruction.
1819/// Returns AArch64CC::Invalid if either the instruction does not use condition
1820/// codes or we don't optimize CmpInstr in the presence of such instructions.
1823 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
1824 Instr.getOperand(CCIdx).getImm())
1826}
1827
1830 UsedNZCV UsedFlags;
1831 switch (CC) {
1832 default:
1833 break;
1834
1835 case AArch64CC::EQ: // Z set
1836 case AArch64CC::NE: // Z clear
1837 UsedFlags.Z = true;
1838 break;
1839
1840 case AArch64CC::HI: // Z clear and C set
1841 case AArch64CC::LS: // Z set or C clear
1842 UsedFlags.Z = true;
1843 [[fallthrough]];
1844 case AArch64CC::HS: // C set
1845 case AArch64CC::LO: // C clear
1846 UsedFlags.C = true;
1847 break;
1848
1849 case AArch64CC::MI: // N set
1850 case AArch64CC::PL: // N clear
1851 UsedFlags.N = true;
1852 break;
1853
1854 case AArch64CC::VS: // V set
1855 case AArch64CC::VC: // V clear
1856 UsedFlags.V = true;
1857 break;
1858
1859 case AArch64CC::GT: // Z clear, N and V the same
1860 case AArch64CC::LE: // Z set, N and V differ
1861 UsedFlags.Z = true;
1862 [[fallthrough]];
1863 case AArch64CC::GE: // N and V the same
1864 case AArch64CC::LT: // N and V differ
1865 UsedFlags.N = true;
1866 UsedFlags.V = true;
1867 break;
1868 }
1869 return UsedFlags;
1870}
1871
1872/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
1873/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
1874/// \returns std::nullopt otherwise.
1875///
1876/// Collect instructions using that flags in \p CCUseInstrs if provided.
1877std::optional<UsedNZCV>
1879 const TargetRegisterInfo &TRI,
1880 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
1881 MachineBasicBlock *CmpParent = CmpInstr.getParent();
1882 if (MI.getParent() != CmpParent)
1883 return std::nullopt;
1884
1885 if (areCFlagsAliveInSuccessors(CmpParent))
1886 return std::nullopt;
1887
1888 UsedNZCV NZCVUsedAfterCmp;
1890 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
1891 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
1893 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1894 return std::nullopt;
1895 NZCVUsedAfterCmp |= getUsedNZCV(CC);
1896 if (CCUseInstrs)
1897 CCUseInstrs->push_back(&Instr);
1898 }
1899 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
1900 break;
1901 }
1902 return NZCVUsedAfterCmp;
1903}
1904
1905static bool isADDSRegImm(unsigned Opcode) {
1906 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1907}
1908
1909static bool isSUBSRegImm(unsigned Opcode) {
1910 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1911}
1912
1913/// Check if CmpInstr can be substituted by MI.
1914///
1915/// CmpInstr can be substituted:
1916/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1917/// - and, MI and CmpInstr are from the same MachineBB
1918/// - and, condition flags are not alive in successors of the CmpInstr parent
1919/// - and, if MI opcode is the S form there must be no defs of flags between
1920/// MI and CmpInstr
1921/// or if MI opcode is not the S form there must be neither defs of flags
1922/// nor uses of flags between MI and CmpInstr.
1923/// - and, if C/V flags are not used after CmpInstr
1924/// or if N flag is used but MI produces poison value if signed overflow
1925/// occurs.
1927 const TargetRegisterInfo &TRI) {
1928 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
1929 // that may or may not set flags.
1930 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
1931
1932 const unsigned CmpOpcode = CmpInstr.getOpcode();
1933 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1934 return false;
1935
1936 assert((CmpInstr.getOperand(2).isImm() &&
1937 CmpInstr.getOperand(2).getImm() == 0) &&
1938 "Caller guarantees that CmpInstr compares with constant 0");
1939
1940 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
1941 if (!NZVCUsed || NZVCUsed->C)
1942 return false;
1943
1944 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
1945 // '%vreg = add ...' or '%vreg = sub ...'.
1946 // Condition flag V is used to indicate signed overflow.
1947 // 1) MI and CmpInstr set N and V to the same value.
1948 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
1949 // signed overflow occurs, so CmpInstr could still be simplified away.
1950 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap))
1951 return false;
1952
1953 AccessKind AccessToCheck = AK_Write;
1954 if (sForm(MI) != MI.getOpcode())
1955 AccessToCheck = AK_All;
1956 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
1957}
1958
1959/// Substitute an instruction comparing to zero with another instruction
1960/// which produces needed condition flags.
1961///
1962/// Return true on success.
1963bool AArch64InstrInfo::substituteCmpToZero(
1964 MachineInstr &CmpInstr, unsigned SrcReg,
1965 const MachineRegisterInfo &MRI) const {
1966 // Get the unique definition of SrcReg.
1967 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
1968 if (!MI)
1969 return false;
1970
1971 const TargetRegisterInfo &TRI = getRegisterInfo();
1972
1973 unsigned NewOpc = sForm(*MI);
1974 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1975 return false;
1976
1977 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
1978 return false;
1979
1980 // Update the instruction to set NZCV.
1981 MI->setDesc(get(NewOpc));
1982 CmpInstr.eraseFromParent();
1984 (void)succeeded;
1985 assert(succeeded && "Some operands reg class are incompatible!");
1986 MI->addRegisterDefined(AArch64::NZCV, &TRI);
1987 return true;
1988}
1989
1990/// \returns True if \p CmpInstr can be removed.
1991///
1992/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
1993/// codes used in \p CCUseInstrs must be inverted.
1995 int CmpValue, const TargetRegisterInfo &TRI,
1997 bool &IsInvertCC) {
1998 assert((CmpValue == 0 || CmpValue == 1) &&
1999 "Only comparisons to 0 or 1 considered for removal!");
2000
2001 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
2002 unsigned MIOpc = MI.getOpcode();
2003 if (MIOpc == AArch64::CSINCWr) {
2004 if (MI.getOperand(1).getReg() != AArch64::WZR ||
2005 MI.getOperand(2).getReg() != AArch64::WZR)
2006 return false;
2007 } else if (MIOpc == AArch64::CSINCXr) {
2008 if (MI.getOperand(1).getReg() != AArch64::XZR ||
2009 MI.getOperand(2).getReg() != AArch64::XZR)
2010 return false;
2011 } else {
2012 return false;
2013 }
2015 if (MICC == AArch64CC::Invalid)
2016 return false;
2017
2018 // NZCV needs to be defined
2019 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
2020 return false;
2021
2022 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
2023 const unsigned CmpOpcode = CmpInstr.getOpcode();
2024 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
2025 if (CmpValue && !IsSubsRegImm)
2026 return false;
2027 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
2028 return false;
2029
2030 // MI conditions allowed: eq, ne, mi, pl
2031 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
2032 if (MIUsedNZCV.C || MIUsedNZCV.V)
2033 return false;
2034
2035 std::optional<UsedNZCV> NZCVUsedAfterCmp =
2036 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
2037 // Condition flags are not used in CmpInstr basic block successors and only
2038 // Z or N flags allowed to be used after CmpInstr within its basic block
2039 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
2040 return false;
2041 // Z or N flag used after CmpInstr must correspond to the flag used in MI
2042 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
2043 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
2044 return false;
2045 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2046 if (MIUsedNZCV.N && !CmpValue)
2047 return false;
2048
2049 // There must be no defs of flags between MI and CmpInstr
2050 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
2051 return false;
2052
2053 // Condition code is inverted in the following cases:
2054 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2055 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2056 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
2057 (!CmpValue && MICC == AArch64CC::NE);
2058 return true;
2059}
2060
2061/// Remove comparison in csinc-cmp sequence
2062///
2063/// Examples:
2064/// 1. \code
2065/// csinc w9, wzr, wzr, ne
2066/// cmp w9, #0
2067/// b.eq
2068/// \endcode
2069/// to
2070/// \code
2071/// csinc w9, wzr, wzr, ne
2072/// b.ne
2073/// \endcode
2074///
2075/// 2. \code
2076/// csinc x2, xzr, xzr, mi
2077/// cmp x2, #1
2078/// b.pl
2079/// \endcode
2080/// to
2081/// \code
2082/// csinc x2, xzr, xzr, mi
2083/// b.pl
2084/// \endcode
2085///
2086/// \param CmpInstr comparison instruction
2087/// \return True when comparison removed
2088bool AArch64InstrInfo::removeCmpToZeroOrOne(
2089 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2090 const MachineRegisterInfo &MRI) const {
2091 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2092 if (!MI)
2093 return false;
2094 const TargetRegisterInfo &TRI = getRegisterInfo();
2095 SmallVector<MachineInstr *, 4> CCUseInstrs;
2096 bool IsInvertCC = false;
2097 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2098 IsInvertCC))
2099 return false;
2100 // Make transformation
2101 CmpInstr.eraseFromParent();
2102 if (IsInvertCC) {
2103 // Invert condition codes in CmpInstr CC users
2104 for (MachineInstr *CCUseInstr : CCUseInstrs) {
2105 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
2106 assert(Idx >= 0 && "Unexpected instruction using CC.");
2107 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
2109 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2110 CCOperand.setImm(CCUse);
2111 }
2112 }
2113 return true;
2114}
2115
2116bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2117 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2118 MI.getOpcode() != AArch64::CATCHRET)
2119 return false;
2120
2121 MachineBasicBlock &MBB = *MI.getParent();
2122 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2123 auto TRI = Subtarget.getRegisterInfo();
2124 DebugLoc DL = MI.getDebugLoc();
2125
2126 if (MI.getOpcode() == AArch64::CATCHRET) {
2127 // Skip to the first instruction before the epilog.
2128 const TargetInstrInfo *TII =
2130 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
2132 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
2133 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
2134 FirstEpilogSEH != MBB.begin())
2135 FirstEpilogSEH = std::prev(FirstEpilogSEH);
2136 if (FirstEpilogSEH != MBB.begin())
2137 FirstEpilogSEH = std::next(FirstEpilogSEH);
2138 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2139 .addReg(AArch64::X0, RegState::Define)
2140 .addMBB(TargetMBB);
2141 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2142 .addReg(AArch64::X0, RegState::Define)
2143 .addReg(AArch64::X0)
2144 .addMBB(TargetMBB)
2145 .addImm(0);
2146 TargetMBB->setMachineBlockAddressTaken();
2147 return true;
2148 }
2149
2150 Register Reg = MI.getOperand(0).getReg();
2152 if (M.getStackProtectorGuard() == "sysreg") {
2153 const AArch64SysReg::SysReg *SrcReg =
2154 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2155 if (!SrcReg)
2156 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2157
2158 // mrs xN, sysreg
2159 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2161 .addImm(SrcReg->Encoding);
2162 int Offset = M.getStackProtectorGuardOffset();
2163 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2164 // ldr xN, [xN, #offset]
2165 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2166 .addDef(Reg)
2168 .addImm(Offset / 8);
2169 } else if (Offset >= -256 && Offset <= 255) {
2170 // ldur xN, [xN, #offset]
2171 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2172 .addDef(Reg)
2174 .addImm(Offset);
2175 } else if (Offset >= -4095 && Offset <= 4095) {
2176 if (Offset > 0) {
2177 // add xN, xN, #offset
2178 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2179 .addDef(Reg)
2181 .addImm(Offset)
2182 .addImm(0);
2183 } else {
2184 // sub xN, xN, #offset
2185 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2186 .addDef(Reg)
2188 .addImm(-Offset)
2189 .addImm(0);
2190 }
2191 // ldr xN, [xN]
2192 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2193 .addDef(Reg)
2195 .addImm(0);
2196 } else {
2197 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2198 // than 23760.
2199 // It might be nice to use AArch64::MOVi32imm here, which would get
2200 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2201 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2202 // AArch64FrameLowering might help us find such a scratch register
2203 // though. If we failed to find a scratch register, we could emit a
2204 // stream of add instructions to build up the immediate. Or, we could try
2205 // to insert a AArch64::MOVi32imm before register allocation so that we
2206 // didn't need to scavenge for a scratch register.
2207 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2208 }
2209 MBB.erase(MI);
2210 return true;
2211 }
2212
2213 const GlobalValue *GV =
2214 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2215 const TargetMachine &TM = MBB.getParent()->getTarget();
2216 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2217 const unsigned char MO_NC = AArch64II::MO_NC;
2218
2219 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2220 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2221 .addGlobalAddress(GV, 0, OpFlags);
2222 if (Subtarget.isTargetILP32()) {
2223 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2224 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2225 .addDef(Reg32, RegState::Dead)
2227 .addImm(0)
2228 .addMemOperand(*MI.memoperands_begin())
2230 } else {
2231 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2233 .addImm(0)
2234 .addMemOperand(*MI.memoperands_begin());
2235 }
2236 } else if (TM.getCodeModel() == CodeModel::Large) {
2237 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2238 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2239 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2240 .addImm(0);
2241 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2243 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2244 .addImm(16);
2245 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2247 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2248 .addImm(32);
2249 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2252 .addImm(48);
2253 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2255 .addImm(0)
2256 .addMemOperand(*MI.memoperands_begin());
2257 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2258 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2259 .addGlobalAddress(GV, 0, OpFlags);
2260 } else {
2261 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2262 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2263 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2264 if (Subtarget.isTargetILP32()) {
2265 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2266 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2267 .addDef(Reg32, RegState::Dead)
2269 .addGlobalAddress(GV, 0, LoFlags)
2270 .addMemOperand(*MI.memoperands_begin())
2272 } else {
2273 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2275 .addGlobalAddress(GV, 0, LoFlags)
2276 .addMemOperand(*MI.memoperands_begin());
2277 }
2278 }
2279
2280 MBB.erase(MI);
2281
2282 return true;
2283}
2284
2285// Return true if this instruction simply sets its single destination register
2286// to zero. This is equivalent to a register rename of the zero-register.
2288 switch (MI.getOpcode()) {
2289 default:
2290 break;
2291 case AArch64::MOVZWi:
2292 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2293 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2294 assert(MI.getDesc().getNumOperands() == 3 &&
2295 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2296 return true;
2297 }
2298 break;
2299 case AArch64::ANDWri: // and Rd, Rzr, #imm
2300 return MI.getOperand(1).getReg() == AArch64::WZR;
2301 case AArch64::ANDXri:
2302 return MI.getOperand(1).getReg() == AArch64::XZR;
2303 case TargetOpcode::COPY:
2304 return MI.getOperand(1).getReg() == AArch64::WZR;
2305 }
2306 return false;
2307}
2308
2309// Return true if this instruction simply renames a general register without
2310// modifying bits.
2312 switch (MI.getOpcode()) {
2313 default:
2314 break;
2315 case TargetOpcode::COPY: {
2316 // GPR32 copies will by lowered to ORRXrs
2317 Register DstReg = MI.getOperand(0).getReg();
2318 return (AArch64::GPR32RegClass.contains(DstReg) ||
2319 AArch64::GPR64RegClass.contains(DstReg));
2320 }
2321 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2322 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2323 assert(MI.getDesc().getNumOperands() == 4 &&
2324 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2325 return true;
2326 }
2327 break;
2328 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2329 if (MI.getOperand(2).getImm() == 0) {
2330 assert(MI.getDesc().getNumOperands() == 4 &&
2331 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2332 return true;
2333 }
2334 break;
2335 }
2336 return false;
2337}
2338
2339// Return true if this instruction simply renames a general register without
2340// modifying bits.
2342 switch (MI.getOpcode()) {
2343 default:
2344 break;
2345 case TargetOpcode::COPY: {
2346 Register DstReg = MI.getOperand(0).getReg();
2347 return AArch64::FPR128RegClass.contains(DstReg);
2348 }
2349 case AArch64::ORRv16i8:
2350 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2351 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2352 "invalid ORRv16i8 operands");
2353 return true;
2354 }
2355 break;
2356 }
2357 return false;
2358}
2359
2361 int &FrameIndex) const {
2362 switch (MI.getOpcode()) {
2363 default:
2364 break;
2365 case AArch64::LDRWui:
2366 case AArch64::LDRXui:
2367 case AArch64::LDRBui:
2368 case AArch64::LDRHui:
2369 case AArch64::LDRSui:
2370 case AArch64::LDRDui:
2371 case AArch64::LDRQui:
2372 case AArch64::LDR_PXI:
2373 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2374 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2375 FrameIndex = MI.getOperand(1).getIndex();
2376 return MI.getOperand(0).getReg();
2377 }
2378 break;
2379 }
2380
2381 return 0;
2382}
2383
2385 int &FrameIndex) const {
2386 switch (MI.getOpcode()) {
2387 default:
2388 break;
2389 case AArch64::STRWui:
2390 case AArch64::STRXui:
2391 case AArch64::STRBui:
2392 case AArch64::STRHui:
2393 case AArch64::STRSui:
2394 case AArch64::STRDui:
2395 case AArch64::STRQui:
2396 case AArch64::STR_PXI:
2397 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2398 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2399 FrameIndex = MI.getOperand(1).getIndex();
2400 return MI.getOperand(0).getReg();
2401 }
2402 break;
2403 }
2404 return 0;
2405}
2406
2407/// Check all MachineMemOperands for a hint to suppress pairing.
2409 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2410 return MMO->getFlags() & MOSuppressPair;
2411 });
2412}
2413
2414/// Set a flag on the first MachineMemOperand to suppress pairing.
2416 if (MI.memoperands_empty())
2417 return;
2418 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2419}
2420
2421/// Check all MachineMemOperands for a hint that the load/store is strided.
2423 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2424 return MMO->getFlags() & MOStridedAccess;
2425 });
2426}
2427
2429 switch (Opc) {
2430 default:
2431 return false;
2432 case AArch64::STURSi:
2433 case AArch64::STRSpre:
2434 case AArch64::STURDi:
2435 case AArch64::STRDpre:
2436 case AArch64::STURQi:
2437 case AArch64::STRQpre:
2438 case AArch64::STURBBi:
2439 case AArch64::STURHHi:
2440 case AArch64::STURWi:
2441 case AArch64::STRWpre:
2442 case AArch64::STURXi:
2443 case AArch64::STRXpre:
2444 case AArch64::LDURSi:
2445 case AArch64::LDRSpre:
2446 case AArch64::LDURDi:
2447 case AArch64::LDRDpre:
2448 case AArch64::LDURQi:
2449 case AArch64::LDRQpre:
2450 case AArch64::LDURWi:
2451 case AArch64::LDRWpre:
2452 case AArch64::LDURXi:
2453 case AArch64::LDRXpre:
2454 case AArch64::LDRSWpre:
2455 case AArch64::LDURSWi:
2456 case AArch64::LDURHHi:
2457 case AArch64::LDURBBi:
2458 case AArch64::LDURSBWi:
2459 case AArch64::LDURSHWi:
2460 return true;
2461 }
2462}
2463
2464std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2465 switch (Opc) {
2466 default: return {};
2467 case AArch64::PRFMui: return AArch64::PRFUMi;
2468 case AArch64::LDRXui: return AArch64::LDURXi;
2469 case AArch64::LDRWui: return AArch64::LDURWi;
2470 case AArch64::LDRBui: return AArch64::LDURBi;
2471 case AArch64::LDRHui: return AArch64::LDURHi;
2472 case AArch64::LDRSui: return AArch64::LDURSi;
2473 case AArch64::LDRDui: return AArch64::LDURDi;
2474 case AArch64::LDRQui: return AArch64::LDURQi;
2475 case AArch64::LDRBBui: return AArch64::LDURBBi;
2476 case AArch64::LDRHHui: return AArch64::LDURHHi;
2477 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2478 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2479 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2480 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2481 case AArch64::LDRSWui: return AArch64::LDURSWi;
2482 case AArch64::STRXui: return AArch64::STURXi;
2483 case AArch64::STRWui: return AArch64::STURWi;
2484 case AArch64::STRBui: return AArch64::STURBi;
2485 case AArch64::STRHui: return AArch64::STURHi;
2486 case AArch64::STRSui: return AArch64::STURSi;
2487 case AArch64::STRDui: return AArch64::STURDi;
2488 case AArch64::STRQui: return AArch64::STURQi;
2489 case AArch64::STRBBui: return AArch64::STURBBi;
2490 case AArch64::STRHHui: return AArch64::STURHHi;
2491 }
2492}
2493
2495 switch (Opc) {
2496 default:
2497 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2498 case AArch64::ADDG:
2499 case AArch64::LDAPURBi:
2500 case AArch64::LDAPURHi:
2501 case AArch64::LDAPURi:
2502 case AArch64::LDAPURSBWi:
2503 case AArch64::LDAPURSBXi:
2504 case AArch64::LDAPURSHWi:
2505 case AArch64::LDAPURSHXi:
2506 case AArch64::LDAPURSWi:
2507 case AArch64::LDAPURXi:
2508 case AArch64::LDR_PPXI:
2509 case AArch64::LDR_PXI:
2510 case AArch64::LDR_ZXI:
2511 case AArch64::LDR_ZZXI:
2512 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
2513 case AArch64::LDR_ZZZXI:
2514 case AArch64::LDR_ZZZZXI:
2515 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
2516 case AArch64::LDRBBui:
2517 case AArch64::LDRBui:
2518 case AArch64::LDRDui:
2519 case AArch64::LDRHHui:
2520 case AArch64::LDRHui:
2521 case AArch64::LDRQui:
2522 case AArch64::LDRSBWui:
2523 case AArch64::LDRSBXui:
2524 case AArch64::LDRSHWui:
2525 case AArch64::LDRSHXui:
2526 case AArch64::LDRSui:
2527 case AArch64::LDRSWui:
2528 case AArch64::LDRWui:
2529 case AArch64::LDRXui:
2530 case AArch64::LDURBBi:
2531 case AArch64::LDURBi:
2532 case AArch64::LDURDi:
2533 case AArch64::LDURHHi:
2534 case AArch64::LDURHi:
2535 case AArch64::LDURQi:
2536 case AArch64::LDURSBWi:
2537 case AArch64::LDURSBXi:
2538 case AArch64::LDURSHWi:
2539 case AArch64::LDURSHXi:
2540 case AArch64::LDURSi:
2541 case AArch64::LDURSWi:
2542 case AArch64::LDURWi:
2543 case AArch64::LDURXi:
2544 case AArch64::PRFMui:
2545 case AArch64::PRFUMi:
2546 case AArch64::ST2Gi:
2547 case AArch64::STGi:
2548 case AArch64::STLURBi:
2549 case AArch64::STLURHi:
2550 case AArch64::STLURWi:
2551 case AArch64::STLURXi:
2552 case AArch64::StoreSwiftAsyncContext:
2553 case AArch64::STR_PPXI:
2554 case AArch64::STR_PXI:
2555 case AArch64::STR_ZXI:
2556 case AArch64::STR_ZZXI:
2557 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
2558 case AArch64::STR_ZZZXI:
2559 case AArch64::STR_ZZZZXI:
2560 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
2561 case AArch64::STRBBui:
2562 case AArch64::STRBui:
2563 case AArch64::STRDui:
2564 case AArch64::STRHHui:
2565 case AArch64::STRHui:
2566 case AArch64::STRQui:
2567 case AArch64::STRSui:
2568 case AArch64::STRWui:
2569 case AArch64::STRXui:
2570 case AArch64::STURBBi:
2571 case AArch64::STURBi:
2572 case AArch64::STURDi:
2573 case AArch64::STURHHi:
2574 case AArch64::STURHi:
2575 case AArch64::STURQi:
2576 case AArch64::STURSi:
2577 case AArch64::STURWi:
2578 case AArch64::STURXi:
2579 case AArch64::STZ2Gi:
2580 case AArch64::STZGi:
2581 case AArch64::TAGPstack:
2582 return 2;
2583 case AArch64::LD1B_D_IMM:
2584 case AArch64::LD1B_H_IMM:
2585 case AArch64::LD1B_IMM:
2586 case AArch64::LD1B_S_IMM:
2587 case AArch64::LD1D_IMM:
2588 case AArch64::LD1H_D_IMM:
2589 case AArch64::LD1H_IMM:
2590 case AArch64::LD1H_S_IMM:
2591 case AArch64::LD1RB_D_IMM:
2592 case AArch64::LD1RB_H_IMM:
2593 case AArch64::LD1RB_IMM:
2594 case AArch64::LD1RB_S_IMM:
2595 case AArch64::LD1RD_IMM:
2596 case AArch64::LD1RH_D_IMM:
2597 case AArch64::LD1RH_IMM:
2598 case AArch64::LD1RH_S_IMM:
2599 case AArch64::LD1RSB_D_IMM:
2600 case AArch64::LD1RSB_H_IMM:
2601 case AArch64::LD1RSB_S_IMM:
2602 case AArch64::LD1RSH_D_IMM:
2603 case AArch64::LD1RSH_S_IMM:
2604 case AArch64::LD1RSW_IMM:
2605 case AArch64::LD1RW_D_IMM:
2606 case AArch64::LD1RW_IMM:
2607 case AArch64::LD1SB_D_IMM:
2608 case AArch64::LD1SB_H_IMM:
2609 case AArch64::LD1SB_S_IMM:
2610 case AArch64::LD1SH_D_IMM:
2611 case AArch64::LD1SH_S_IMM:
2612 case AArch64::LD1SW_D_IMM:
2613 case AArch64::LD1W_D_IMM:
2614 case AArch64::LD1W_IMM:
2615 case AArch64::LD2B_IMM:
2616 case AArch64::LD2D_IMM:
2617 case AArch64::LD2H_IMM:
2618 case AArch64::LD2W_IMM:
2619 case AArch64::LD3B_IMM:
2620 case AArch64::LD3D_IMM:
2621 case AArch64::LD3H_IMM:
2622 case AArch64::LD3W_IMM:
2623 case AArch64::LD4B_IMM:
2624 case AArch64::LD4D_IMM:
2625 case AArch64::LD4H_IMM:
2626 case AArch64::LD4W_IMM:
2627 case AArch64::LDG:
2628 case AArch64::LDNF1B_D_IMM:
2629 case AArch64::LDNF1B_H_IMM:
2630 case AArch64::LDNF1B_IMM:
2631 case AArch64::LDNF1B_S_IMM:
2632 case AArch64::LDNF1D_IMM:
2633 case AArch64::LDNF1H_D_IMM:
2634 case AArch64::LDNF1H_IMM:
2635 case AArch64::LDNF1H_S_IMM:
2636 case AArch64::LDNF1SB_D_IMM:
2637 case AArch64::LDNF1SB_H_IMM:
2638 case AArch64::LDNF1SB_S_IMM:
2639 case AArch64::LDNF1SH_D_IMM:
2640 case AArch64::LDNF1SH_S_IMM:
2641 case AArch64::LDNF1SW_D_IMM:
2642 case AArch64::LDNF1W_D_IMM:
2643 case AArch64::LDNF1W_IMM:
2644 case AArch64::LDNPDi:
2645 case AArch64::LDNPQi:
2646 case AArch64::LDNPSi:
2647 case AArch64::LDNPWi:
2648 case AArch64::LDNPXi:
2649 case AArch64::LDNT1B_ZRI:
2650 case AArch64::LDNT1D_ZRI:
2651 case AArch64::LDNT1H_ZRI:
2652 case AArch64::LDNT1W_ZRI:
2653 case AArch64::LDPDi:
2654 case AArch64::LDPQi:
2655 case AArch64::LDPSi:
2656 case AArch64::LDPWi:
2657 case AArch64::LDPXi:
2658 case AArch64::LDRBBpost:
2659 case AArch64::LDRBBpre:
2660 case AArch64::LDRBpost:
2661 case AArch64::LDRBpre:
2662 case AArch64::LDRDpost:
2663 case AArch64::LDRDpre:
2664 case AArch64::LDRHHpost:
2665 case AArch64::LDRHHpre:
2666 case AArch64::LDRHpost:
2667 case AArch64::LDRHpre:
2668 case AArch64::LDRQpost:
2669 case AArch64::LDRQpre:
2670 case AArch64::LDRSpost:
2671 case AArch64::LDRSpre:
2672 case AArch64::LDRWpost:
2673 case AArch64::LDRWpre:
2674 case AArch64::LDRXpost:
2675 case AArch64::LDRXpre:
2676 case AArch64::ST1B_D_IMM:
2677 case AArch64::ST1B_H_IMM:
2678 case AArch64::ST1B_IMM:
2679 case AArch64::ST1B_S_IMM:
2680 case AArch64::ST1D_IMM:
2681 case AArch64::ST1H_D_IMM:
2682 case AArch64::ST1H_IMM:
2683 case AArch64::ST1H_S_IMM:
2684 case AArch64::ST1W_D_IMM:
2685 case AArch64::ST1W_IMM:
2686 case AArch64::ST2B_IMM:
2687 case AArch64::ST2D_IMM:
2688 case AArch64::ST2H_IMM:
2689 case AArch64::ST2W_IMM:
2690 case AArch64::ST3B_IMM:
2691 case AArch64::ST3D_IMM:
2692 case AArch64::ST3H_IMM:
2693 case AArch64::ST3W_IMM:
2694 case AArch64::ST4B_IMM:
2695 case AArch64::ST4D_IMM:
2696 case AArch64::ST4H_IMM:
2697 case AArch64::ST4W_IMM:
2698 case AArch64::STGPi:
2699 case AArch64::STGPreIndex:
2700 case AArch64::STZGPreIndex:
2701 case AArch64::ST2GPreIndex:
2702 case AArch64::STZ2GPreIndex:
2703 case AArch64::STGPostIndex:
2704 case AArch64::STZGPostIndex:
2705 case AArch64::ST2GPostIndex:
2706 case AArch64::STZ2GPostIndex:
2707 case AArch64::STNPDi:
2708 case AArch64::STNPQi:
2709 case AArch64::STNPSi:
2710 case AArch64::STNPWi:
2711 case AArch64::STNPXi:
2712 case AArch64::STNT1B_ZRI:
2713 case AArch64::STNT1D_ZRI:
2714 case AArch64::STNT1H_ZRI:
2715 case AArch64::STNT1W_ZRI:
2716 case AArch64::STPDi:
2717 case AArch64::STPQi:
2718 case AArch64::STPSi:
2719 case AArch64::STPWi:
2720 case AArch64::STPXi:
2721 case AArch64::STRBBpost:
2722 case AArch64::STRBBpre:
2723 case AArch64::STRBpost:
2724 case AArch64::STRBpre:
2725 case AArch64::STRDpost:
2726 case AArch64::STRDpre:
2727 case AArch64::STRHHpost:
2728 case AArch64::STRHHpre:
2729 case AArch64::STRHpost:
2730 case AArch64::STRHpre:
2731 case AArch64::STRQpost:
2732 case AArch64::STRQpre:
2733 case AArch64::STRSpost:
2734 case AArch64::STRSpre:
2735 case AArch64::STRWpost:
2736 case AArch64::STRWpre:
2737 case AArch64::STRXpost:
2738 case AArch64::STRXpre:
2739 return 3;
2740 case AArch64::LDPDpost:
2741 case AArch64::LDPDpre:
2742 case AArch64::LDPQpost:
2743 case AArch64::LDPQpre:
2744 case AArch64::LDPSpost:
2745 case AArch64::LDPSpre:
2746 case AArch64::LDPWpost:
2747 case AArch64::LDPWpre:
2748 case AArch64::LDPXpost:
2749 case AArch64::LDPXpre:
2750 case AArch64::STGPpre:
2751 case AArch64::STGPpost:
2752 case AArch64::STPDpost:
2753 case AArch64::STPDpre:
2754 case AArch64::STPQpost:
2755 case AArch64::STPQpre:
2756 case AArch64::STPSpost:
2757 case AArch64::STPSpre:
2758 case AArch64::STPWpost:
2759 case AArch64::STPWpre:
2760 case AArch64::STPXpost:
2761 case AArch64::STPXpre:
2762 return 4;
2763 }
2764}
2765
2767 switch (MI.getOpcode()) {
2768 default:
2769 return false;
2770 // Scaled instructions.
2771 case AArch64::STRSui:
2772 case AArch64::STRDui:
2773 case AArch64::STRQui:
2774 case AArch64::STRXui:
2775 case AArch64::STRWui:
2776 case AArch64::LDRSui:
2777 case AArch64::LDRDui:
2778 case AArch64::LDRQui:
2779 case AArch64::LDRXui:
2780 case AArch64::LDRWui:
2781 case AArch64::LDRSWui:
2782 // Unscaled instructions.
2783 case AArch64::STURSi:
2784 case AArch64::STRSpre:
2785 case AArch64::STURDi:
2786 case AArch64::STRDpre:
2787 case AArch64::STURQi:
2788 case AArch64::STRQpre:
2789 case AArch64::STURWi:
2790 case AArch64::STRWpre:
2791 case AArch64::STURXi:
2792 case AArch64::STRXpre:
2793 case AArch64::LDURSi:
2794 case AArch64::LDRSpre:
2795 case AArch64::LDURDi:
2796 case AArch64::LDRDpre:
2797 case AArch64::LDURQi:
2798 case AArch64::LDRQpre:
2799 case AArch64::LDURWi:
2800 case AArch64::LDRWpre:
2801 case AArch64::LDURXi:
2802 case AArch64::LDRXpre:
2803 case AArch64::LDURSWi:
2804 case AArch64::LDRSWpre:
2805 // SVE instructions.
2806 case AArch64::LDR_ZXI:
2807 case AArch64::STR_ZXI:
2808 return true;
2809 }
2810}
2811
2813 switch (MI.getOpcode()) {
2814 default:
2815 assert((!MI.isCall() || !MI.isReturn()) &&
2816 "Unexpected instruction - was a new tail call opcode introduced?");
2817 return false;
2818 case AArch64::TCRETURNdi:
2819 case AArch64::TCRETURNri:
2820 case AArch64::TCRETURNrix16x17:
2821 case AArch64::TCRETURNrix17:
2822 case AArch64::TCRETURNrinotx16:
2823 case AArch64::TCRETURNriALL:
2824 case AArch64::AUTH_TCRETURN:
2825 case AArch64::AUTH_TCRETURN_BTI:
2826 return true;
2827 }
2828}
2829
2831 switch (Opc) {
2832 default:
2833 llvm_unreachable("Opcode has no flag setting equivalent!");
2834 // 32-bit cases:
2835 case AArch64::ADDWri:
2836 return AArch64::ADDSWri;
2837 case AArch64::ADDWrr:
2838 return AArch64::ADDSWrr;
2839 case AArch64::ADDWrs:
2840 return AArch64::ADDSWrs;
2841 case AArch64::ADDWrx:
2842 return AArch64::ADDSWrx;
2843 case AArch64::ANDWri:
2844 return AArch64::ANDSWri;
2845 case AArch64::ANDWrr:
2846 return AArch64::ANDSWrr;
2847 case AArch64::ANDWrs:
2848 return AArch64::ANDSWrs;
2849 case AArch64::BICWrr:
2850 return AArch64::BICSWrr;
2851 case AArch64::BICWrs:
2852 return AArch64::BICSWrs;
2853 case AArch64::SUBWri:
2854 return AArch64::SUBSWri;
2855 case AArch64::SUBWrr:
2856 return AArch64::SUBSWrr;
2857 case AArch64::SUBWrs:
2858 return AArch64::SUBSWrs;
2859 case AArch64::SUBWrx:
2860 return AArch64::SUBSWrx;
2861 // 64-bit cases:
2862 case AArch64::ADDXri:
2863 return AArch64::ADDSXri;
2864 case AArch64::ADDXrr:
2865 return AArch64::ADDSXrr;
2866 case AArch64::ADDXrs:
2867 return AArch64::ADDSXrs;
2868 case AArch64::ADDXrx:
2869 return AArch64::ADDSXrx;
2870 case AArch64::ANDXri:
2871 return AArch64::ANDSXri;
2872 case AArch64::ANDXrr:
2873 return AArch64::ANDSXrr;
2874 case AArch64::ANDXrs:
2875 return AArch64::ANDSXrs;
2876 case AArch64::BICXrr:
2877 return AArch64::BICSXrr;
2878 case AArch64::BICXrs:
2879 return AArch64::BICSXrs;
2880 case AArch64::SUBXri:
2881 return AArch64::SUBSXri;
2882 case AArch64::SUBXrr:
2883 return AArch64::SUBSXrr;
2884 case AArch64::SUBXrs:
2885 return AArch64::SUBSXrs;
2886 case AArch64::SUBXrx:
2887 return AArch64::SUBSXrx;
2888 // SVE instructions:
2889 case AArch64::AND_PPzPP:
2890 return AArch64::ANDS_PPzPP;
2891 case AArch64::BIC_PPzPP:
2892 return AArch64::BICS_PPzPP;
2893 case AArch64::EOR_PPzPP:
2894 return AArch64::EORS_PPzPP;
2895 case AArch64::NAND_PPzPP:
2896 return AArch64::NANDS_PPzPP;
2897 case AArch64::NOR_PPzPP:
2898 return AArch64::NORS_PPzPP;
2899 case AArch64::ORN_PPzPP:
2900 return AArch64::ORNS_PPzPP;
2901 case AArch64::ORR_PPzPP:
2902 return AArch64::ORRS_PPzPP;
2903 case AArch64::BRKA_PPzP:
2904 return AArch64::BRKAS_PPzP;
2905 case AArch64::BRKPA_PPzPP:
2906 return AArch64::BRKPAS_PPzPP;
2907 case AArch64::BRKB_PPzP:
2908 return AArch64::BRKBS_PPzP;
2909 case AArch64::BRKPB_PPzPP:
2910 return AArch64::BRKPBS_PPzPP;
2911 case AArch64::BRKN_PPzP:
2912 return AArch64::BRKNS_PPzP;
2913 case AArch64::RDFFR_PPz:
2914 return AArch64::RDFFRS_PPz;
2915 case AArch64::PTRUE_B:
2916 return AArch64::PTRUES_B;
2917 }
2918}
2919
2920// Is this a candidate for ld/st merging or pairing? For example, we don't
2921// touch volatiles or load/stores that have a hint to avoid pair formation.
2923
2924 bool IsPreLdSt = isPreLdSt(MI);
2925
2926 // If this is a volatile load/store, don't mess with it.
2927 if (MI.hasOrderedMemoryRef())
2928 return false;
2929
2930 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
2931 // For Pre-inc LD/ST, the operand is shifted by one.
2932 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
2933 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
2934 "Expected a reg or frame index operand.");
2935
2936 // For Pre-indexed addressing quadword instructions, the third operand is the
2937 // immediate value.
2938 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
2939
2940 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
2941 return false;
2942
2943 // Can't merge/pair if the instruction modifies the base register.
2944 // e.g., ldr x0, [x0]
2945 // This case will never occur with an FI base.
2946 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
2947 // STR<S,D,Q,W,X>pre, it can be merged.
2948 // For example:
2949 // ldr q0, [x11, #32]!
2950 // ldr q1, [x11, #16]
2951 // to
2952 // ldp q0, q1, [x11, #32]!
2953 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
2954 Register BaseReg = MI.getOperand(1).getReg();
2956 if (MI.modifiesRegister(BaseReg, TRI))
2957 return false;
2958 }
2959
2960 // Pairing SVE fills/spills is only valid for little-endian targets that
2961 // implement VLS 128.
2962 switch (MI.getOpcode()) {
2963 default:
2964 break;
2965 case AArch64::LDR_ZXI:
2966 case AArch64::STR_ZXI:
2967 if (!Subtarget.isLittleEndian() ||
2968 Subtarget.getSVEVectorSizeInBits() != 128)
2969 return false;
2970 }
2971
2972 // Check if this load/store has a hint to avoid pair formation.
2973 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2975 return false;
2976
2977 // Do not pair any callee-save store/reload instructions in the
2978 // prologue/epilogue if the CFI information encoded the operations as separate
2979 // instructions, as that will cause the size of the actual prologue to mismatch
2980 // with the prologue size recorded in the Windows CFI.
2981 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
2982 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
2983 MI.getMF()->getFunction().needsUnwindTableEntry();
2984 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
2986 return false;
2987
2988 // On some CPUs quad load/store pairs are slower than two single load/stores.
2989 if (Subtarget.isPaired128Slow()) {
2990 switch (MI.getOpcode()) {
2991 default:
2992 break;
2993 case AArch64::LDURQi:
2994 case AArch64::STURQi:
2995 case AArch64::LDRQui:
2996 case AArch64::STRQui:
2997 return false;
2998 }
2999 }
3000
3001 return true;
3002}
3003
3006 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
3007 const TargetRegisterInfo *TRI) const {
3008 if (!LdSt.mayLoadOrStore())
3009 return false;
3010
3011 const MachineOperand *BaseOp;
3012 TypeSize WidthN(0, false);
3013 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
3014 WidthN, TRI))
3015 return false;
3016 // The maximum vscale is 16 under AArch64, return the maximal extent for the
3017 // vector.
3018 Width = LocationSize::precise(WidthN);
3019 BaseOps.push_back(BaseOp);
3020 return true;
3021}
3022
3023std::optional<ExtAddrMode>
3025 const TargetRegisterInfo *TRI) const {
3026 const MachineOperand *Base; // Filled with the base operand of MI.
3027 int64_t Offset; // Filled with the offset of MI.
3028 bool OffsetIsScalable;
3029 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
3030 return std::nullopt;
3031
3032 if (!Base->isReg())
3033 return std::nullopt;
3034 ExtAddrMode AM;
3035 AM.BaseReg = Base->getReg();
3036 AM.Displacement = Offset;
3037 AM.ScaledReg = 0;
3038 AM.Scale = 0;
3039 return AM;
3040}
3041
3043 Register Reg,
3044 const MachineInstr &AddrI,
3045 ExtAddrMode &AM) const {
3046 // Filter out instructions into which we cannot fold.
3047 unsigned NumBytes;
3048 int64_t OffsetScale = 1;
3049 switch (MemI.getOpcode()) {
3050 default:
3051 return false;
3052
3053 case AArch64::LDURQi:
3054 case AArch64::STURQi:
3055 NumBytes = 16;
3056 break;
3057
3058 case AArch64::LDURDi:
3059 case AArch64::STURDi:
3060 case AArch64::LDURXi:
3061 case AArch64::STURXi:
3062 NumBytes = 8;
3063 break;
3064
3065 case AArch64::LDURWi:
3066 case AArch64::LDURSWi:
3067 case AArch64::STURWi:
3068 NumBytes = 4;
3069 break;
3070
3071 case AArch64::LDURHi:
3072 case AArch64::STURHi:
3073 case AArch64::LDURHHi:
3074 case AArch64::STURHHi:
3075 case AArch64::LDURSHXi:
3076 case AArch64::LDURSHWi:
3077 NumBytes = 2;
3078 break;
3079
3080 case AArch64::LDRBroX:
3081 case AArch64::LDRBBroX:
3082 case AArch64::LDRSBXroX:
3083 case AArch64::LDRSBWroX:
3084 case AArch64::STRBroX:
3085 case AArch64::STRBBroX:
3086 case AArch64::LDURBi:
3087 case AArch64::LDURBBi:
3088 case AArch64::LDURSBXi:
3089 case AArch64::LDURSBWi:
3090 case AArch64::STURBi:
3091 case AArch64::STURBBi:
3092 case AArch64::LDRBui:
3093 case AArch64::LDRBBui:
3094 case AArch64::LDRSBXui:
3095 case AArch64::LDRSBWui:
3096 case AArch64::STRBui:
3097 case AArch64::STRBBui:
3098 NumBytes = 1;
3099 break;
3100
3101 case AArch64::LDRQroX:
3102 case AArch64::STRQroX:
3103 case AArch64::LDRQui:
3104 case AArch64::STRQui:
3105 NumBytes = 16;
3106 OffsetScale = 16;
3107 break;
3108
3109 case AArch64::LDRDroX:
3110 case AArch64::STRDroX:
3111 case AArch64::LDRXroX:
3112 case AArch64::STRXroX:
3113 case AArch64::LDRDui:
3114 case AArch64::STRDui:
3115 case AArch64::LDRXui:
3116 case AArch64::STRXui:
3117 NumBytes = 8;
3118 OffsetScale = 8;
3119 break;
3120
3121 case AArch64::LDRWroX:
3122 case AArch64::LDRSWroX:
3123 case AArch64::STRWroX:
3124 case AArch64::LDRWui:
3125 case AArch64::LDRSWui:
3126 case AArch64::STRWui:
3127 NumBytes = 4;
3128 OffsetScale = 4;
3129 break;
3130
3131 case AArch64::LDRHroX:
3132 case AArch64::STRHroX:
3133 case AArch64::LDRHHroX:
3134 case AArch64::STRHHroX:
3135 case AArch64::LDRSHXroX:
3136 case AArch64::LDRSHWroX:
3137 case AArch64::LDRHui:
3138 case AArch64::STRHui:
3139 case AArch64::LDRHHui:
3140 case AArch64::STRHHui:
3141 case AArch64::LDRSHXui:
3142 case AArch64::LDRSHWui:
3143 NumBytes = 2;
3144 OffsetScale = 2;
3145 break;
3146 }
3147
3148 // Check the fold operand is not the loaded/stored value.
3149 const MachineOperand &BaseRegOp = MemI.getOperand(0);
3150 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3151 return false;
3152
3153 // Handle memory instructions with a [Reg, Reg] addressing mode.
3154 if (MemI.getOperand(2).isReg()) {
3155 // Bail if the addressing mode already includes extension of the offset
3156 // register.
3157 if (MemI.getOperand(3).getImm())
3158 return false;
3159
3160 // Check if we actually have a scaled offset.
3161 if (MemI.getOperand(4).getImm() == 0)
3162 OffsetScale = 1;
3163
3164 // If the address instructions is folded into the base register, then the
3165 // addressing mode must not have a scale. Then we can swap the base and the
3166 // scaled registers.
3167 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3168 return false;
3169
3170 switch (AddrI.getOpcode()) {
3171 default:
3172 return false;
3173
3174 case AArch64::SBFMXri:
3175 // sxtw Xa, Wm
3176 // ldr Xd, [Xn, Xa, lsl #N]
3177 // ->
3178 // ldr Xd, [Xn, Wm, sxtw #N]
3179 if (AddrI.getOperand(2).getImm() != 0 ||
3180 AddrI.getOperand(3).getImm() != 31)
3181 return false;
3182
3183 AM.BaseReg = MemI.getOperand(1).getReg();
3184 if (AM.BaseReg == Reg)
3185 AM.BaseReg = MemI.getOperand(2).getReg();
3186 AM.ScaledReg = AddrI.getOperand(1).getReg();
3187 AM.Scale = OffsetScale;
3188 AM.Displacement = 0;
3190 return true;
3191
3192 case TargetOpcode::SUBREG_TO_REG: {
3193 // mov Wa, Wm
3194 // ldr Xd, [Xn, Xa, lsl #N]
3195 // ->
3196 // ldr Xd, [Xn, Wm, uxtw #N]
3197
3198 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3199 if (AddrI.getOperand(1).getImm() != 0 ||
3200 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3201 return false;
3202
3203 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3204 Register OffsetReg = AddrI.getOperand(2).getReg();
3205 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3206 return false;
3207
3208 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3209 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3210 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3211 DefMI.getOperand(3).getImm() != 0)
3212 return false;
3213
3214 AM.BaseReg = MemI.getOperand(1).getReg();
3215 if (AM.BaseReg == Reg)
3216 AM.BaseReg = MemI.getOperand(2).getReg();
3217 AM.ScaledReg = DefMI.getOperand(2).getReg();
3218 AM.Scale = OffsetScale;
3219 AM.Displacement = 0;
3221 return true;
3222 }
3223 }
3224 }
3225
3226 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3227
3228 // Check we are not breaking a potential conversion to an LDP.
3229 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3230 int64_t NewOffset) -> bool {
3231 int64_t MinOffset, MaxOffset;
3232 switch (NumBytes) {
3233 default:
3234 return true;
3235 case 4:
3236 MinOffset = -256;
3237 MaxOffset = 252;
3238 break;
3239 case 8:
3240 MinOffset = -512;
3241 MaxOffset = 504;
3242 break;
3243 case 16:
3244 MinOffset = -1024;
3245 MaxOffset = 1008;
3246 break;
3247 }
3248 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3249 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3250 };
3251 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3252 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3253 int64_t NewOffset = OldOffset + Disp;
3254 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3255 return false;
3256 // If the old offset would fit into an LDP, but the new offset wouldn't,
3257 // bail out.
3258 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3259 return false;
3260 AM.BaseReg = AddrI.getOperand(1).getReg();
3261 AM.ScaledReg = 0;
3262 AM.Scale = 0;
3263 AM.Displacement = NewOffset;
3265 return true;
3266 };
3267
3268 auto canFoldAddRegIntoAddrMode =
3269 [&](int64_t Scale,
3271 if (MemI.getOperand(2).getImm() != 0)
3272 return false;
3273 if ((unsigned)Scale != Scale)
3274 return false;
3275 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3276 return false;
3277 AM.BaseReg = AddrI.getOperand(1).getReg();
3278 AM.ScaledReg = AddrI.getOperand(2).getReg();
3279 AM.Scale = Scale;
3280 AM.Displacement = 0;
3281 AM.Form = Form;
3282 return true;
3283 };
3284
3285 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3286 unsigned Opcode = MemI.getOpcode();
3287 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3288 Subtarget.isSTRQroSlow();
3289 };
3290
3291 int64_t Disp = 0;
3292 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3293 switch (AddrI.getOpcode()) {
3294 default:
3295 return false;
3296
3297 case AArch64::ADDXri:
3298 // add Xa, Xn, #N
3299 // ldr Xd, [Xa, #M]
3300 // ->
3301 // ldr Xd, [Xn, #N'+M]
3302 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3303 return canFoldAddSubImmIntoAddrMode(Disp);
3304
3305 case AArch64::SUBXri:
3306 // sub Xa, Xn, #N
3307 // ldr Xd, [Xa, #M]
3308 // ->
3309 // ldr Xd, [Xn, #N'+M]
3310 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3311 return canFoldAddSubImmIntoAddrMode(-Disp);
3312
3313 case AArch64::ADDXrs: {
3314 // add Xa, Xn, Xm, lsl #N
3315 // ldr Xd, [Xa]
3316 // ->
3317 // ldr Xd, [Xn, Xm, lsl #N]
3318
3319 // Don't fold the add if the result would be slower, unless optimising for
3320 // size.
3321 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3323 return false;
3324 Shift = AArch64_AM::getShiftValue(Shift);
3325 if (!OptSize) {
3326 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3327 return false;
3328 if (avoidSlowSTRQ(MemI))
3329 return false;
3330 }
3331 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3332 }
3333
3334 case AArch64::ADDXrr:
3335 // add Xa, Xn, Xm
3336 // ldr Xd, [Xa]
3337 // ->
3338 // ldr Xd, [Xn, Xm, lsl #0]
3339
3340 // Don't fold the add if the result would be slower, unless optimising for
3341 // size.
3342 if (!OptSize && avoidSlowSTRQ(MemI))
3343 return false;
3344 return canFoldAddRegIntoAddrMode(1);
3345
3346 case AArch64::ADDXrx:
3347 // add Xa, Xn, Wm, {s,u}xtw #N
3348 // ldr Xd, [Xa]
3349 // ->
3350 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3351
3352 // Don't fold the add if the result would be slower, unless optimising for
3353 // size.
3354 if (!OptSize && avoidSlowSTRQ(MemI))
3355 return false;
3356
3357 // Can fold only sign-/zero-extend of a word.
3358 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3360 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3361 return false;
3362
3363 return canFoldAddRegIntoAddrMode(
3364 1ULL << AArch64_AM::getArithShiftValue(Imm),
3367 }
3368}
3369
3370// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3371// return the opcode of an instruction performing the same operation, but using
3372// the [Reg, Reg] addressing mode.
3373static unsigned regOffsetOpcode(unsigned Opcode) {
3374 switch (Opcode) {
3375 default:
3376 llvm_unreachable("Address folding not implemented for instruction");
3377
3378 case AArch64::LDURQi:
3379 case AArch64::LDRQui:
3380 return AArch64::LDRQroX;
3381 case AArch64::STURQi:
3382 case AArch64::STRQui:
3383 return AArch64::STRQroX;
3384 case AArch64::LDURDi:
3385 case AArch64::LDRDui:
3386 return AArch64::LDRDroX;
3387 case AArch64::STURDi:
3388 case AArch64::STRDui:
3389 return AArch64::STRDroX;
3390 case AArch64::LDURXi:
3391 case AArch64::LDRXui:
3392 return AArch64::LDRXroX;
3393 case AArch64::STURXi:
3394 case AArch64::STRXui:
3395 return AArch64::STRXroX;
3396 case AArch64::LDURWi:
3397 case AArch64::LDRWui:
3398 return AArch64::LDRWroX;
3399 case AArch64::LDURSWi:
3400 case AArch64::LDRSWui:
3401 return AArch64::LDRSWroX;
3402 case AArch64::STURWi:
3403 case AArch64::STRWui:
3404 return AArch64::STRWroX;
3405 case AArch64::LDURHi:
3406 case AArch64::LDRHui:
3407 return AArch64::LDRHroX;
3408 case AArch64::STURHi:
3409 case AArch64::STRHui:
3410 return AArch64::STRHroX;
3411 case AArch64::LDURHHi:
3412 case AArch64::LDRHHui:
3413 return AArch64::LDRHHroX;
3414 case AArch64::STURHHi:
3415 case AArch64::STRHHui:
3416 return AArch64::STRHHroX;
3417 case AArch64::LDURSHXi:
3418 case AArch64::LDRSHXui:
3419 return AArch64::LDRSHXroX;
3420 case AArch64::LDURSHWi:
3421 case AArch64::LDRSHWui:
3422 return AArch64::LDRSHWroX;
3423 case AArch64::LDURBi:
3424 case AArch64::LDRBui:
3425 return AArch64::LDRBroX;
3426 case AArch64::LDURBBi:
3427 case AArch64::LDRBBui:
3428 return AArch64::LDRBBroX;
3429 case AArch64::LDURSBXi:
3430 case AArch64::LDRSBXui:
3431 return AArch64::LDRSBXroX;
3432 case AArch64::LDURSBWi:
3433 case AArch64::LDRSBWui:
3434 return AArch64::LDRSBWroX;
3435 case AArch64::STURBi:
3436 case AArch64::STRBui:
3437 return AArch64::STRBroX;
3438 case AArch64::STURBBi:
3439 case AArch64::STRBBui:
3440 return AArch64::STRBBroX;
3441 }
3442}
3443
3444// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3445// the opcode of an instruction performing the same operation, but using the
3446// [Reg, #Imm] addressing mode with scaled offset.
3447unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3448 switch (Opcode) {
3449 default:
3450 llvm_unreachable("Address folding not implemented for instruction");
3451
3452 case AArch64::LDURQi:
3453 Scale = 16;
3454 return AArch64::LDRQui;
3455 case AArch64::STURQi:
3456 Scale = 16;
3457 return AArch64::STRQui;
3458 case AArch64::LDURDi:
3459 Scale = 8;
3460 return AArch64::LDRDui;
3461 case AArch64::STURDi:
3462 Scale = 8;
3463 return AArch64::STRDui;
3464 case AArch64::LDURXi:
3465 Scale = 8;
3466 return AArch64::LDRXui;
3467 case AArch64::STURXi:
3468 Scale = 8;
3469 return AArch64::STRXui;
3470 case AArch64::LDURWi:
3471 Scale = 4;
3472 return AArch64::LDRWui;
3473 case AArch64::LDURSWi:
3474 Scale = 4;
3475 return AArch64::LDRSWui;
3476 case AArch64::STURWi:
3477 Scale = 4;
3478 return AArch64::STRWui;
3479 case AArch64::LDURHi:
3480 Scale = 2;
3481 return AArch64::LDRHui;
3482 case AArch64::STURHi:
3483 Scale = 2;
3484 return AArch64::STRHui;
3485 case AArch64::LDURHHi:
3486 Scale = 2;
3487 return AArch64::LDRHHui;
3488 case AArch64::STURHHi:
3489 Scale = 2;
3490 return AArch64::STRHHui;
3491 case AArch64::LDURSHXi:
3492 Scale = 2;
3493 return AArch64::LDRSHXui;
3494 case AArch64::LDURSHWi:
3495 Scale = 2;
3496 return AArch64::LDRSHWui;
3497 case AArch64::LDURBi:
3498 Scale = 1;
3499 return AArch64::LDRBui;
3500 case AArch64::LDURBBi:
3501 Scale = 1;
3502 return AArch64::LDRBBui;
3503 case AArch64::LDURSBXi:
3504 Scale = 1;
3505 return AArch64::LDRSBXui;
3506 case AArch64::LDURSBWi:
3507 Scale = 1;
3508 return AArch64::LDRSBWui;
3509 case AArch64::STURBi:
3510 Scale = 1;
3511 return AArch64::STRBui;
3512 case AArch64::STURBBi:
3513 Scale = 1;
3514 return AArch64::STRBBui;
3515 case AArch64::LDRQui:
3516 case AArch64::STRQui:
3517 Scale = 16;
3518 return Opcode;
3519 case AArch64::LDRDui:
3520 case AArch64::STRDui:
3521 case AArch64::LDRXui:
3522 case AArch64::STRXui:
3523 Scale = 8;
3524 return Opcode;
3525 case AArch64::LDRWui:
3526 case AArch64::LDRSWui:
3527 case AArch64::STRWui:
3528 Scale = 4;
3529 return Opcode;
3530 case AArch64::LDRHui:
3531 case AArch64::STRHui:
3532 case AArch64::LDRHHui:
3533 case AArch64::STRHHui:
3534 case AArch64::LDRSHXui:
3535 case AArch64::LDRSHWui:
3536 Scale = 2;
3537 return Opcode;
3538 case AArch64::LDRBui:
3539 case AArch64::LDRBBui:
3540 case AArch64::LDRSBXui:
3541 case AArch64::LDRSBWui:
3542 case AArch64::STRBui:
3543 case AArch64::STRBBui:
3544 Scale = 1;
3545 return Opcode;
3546 }
3547}
3548
3549// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3550// the opcode of an instruction performing the same operation, but using the
3551// [Reg, #Imm] addressing mode with unscaled offset.
3552unsigned unscaledOffsetOpcode(unsigned Opcode) {
3553 switch (Opcode) {
3554 default:
3555 llvm_unreachable("Address folding not implemented for instruction");
3556
3557 case AArch64::LDURQi:
3558 case AArch64::STURQi:
3559 case AArch64::LDURDi:
3560 case AArch64::STURDi:
3561 case AArch64::LDURXi:
3562 case AArch64::STURXi:
3563 case AArch64::LDURWi:
3564 case AArch64::LDURSWi:
3565 case AArch64::STURWi:
3566 case AArch64::LDURHi:
3567 case AArch64::STURHi:
3568 case AArch64::LDURHHi:
3569 case AArch64::STURHHi:
3570 case AArch64::LDURSHXi:
3571 case AArch64::LDURSHWi:
3572 case AArch64::LDURBi:
3573 case AArch64::STURBi:
3574 case AArch64::LDURBBi:
3575 case AArch64::STURBBi:
3576 case AArch64::LDURSBWi:
3577 case AArch64::LDURSBXi:
3578 return Opcode;
3579 case AArch64::LDRQui:
3580 return AArch64::LDURQi;
3581 case AArch64::STRQui:
3582 return AArch64::STURQi;
3583 case AArch64::LDRDui:
3584 return AArch64::LDURDi;
3585 case AArch64::STRDui:
3586 return AArch64::STURDi;
3587 case AArch64::LDRXui:
3588 return AArch64::LDURXi;
3589 case AArch64::STRXui:
3590 return AArch64::STURXi;
3591 case AArch64::LDRWui:
3592 return AArch64::LDURWi;
3593 case AArch64::LDRSWui:
3594 return AArch64::LDURSWi;
3595 case AArch64::STRWui:
3596 return AArch64::STURWi;
3597 case AArch64::LDRHui:
3598 return AArch64::LDURHi;
3599 case AArch64::STRHui:
3600 return AArch64::STURHi;
3601 case AArch64::LDRHHui:
3602 return AArch64::LDURHHi;
3603 case AArch64::STRHHui:
3604 return AArch64::STURHHi;
3605 case AArch64::LDRSHXui:
3606 return AArch64::LDURSHXi;
3607 case AArch64::LDRSHWui:
3608 return AArch64::LDURSHWi;
3609 case AArch64::LDRBBui:
3610 return AArch64::LDURBBi;
3611 case AArch64::LDRBui:
3612 return AArch64::LDURBi;
3613 case AArch64::STRBBui:
3614 return AArch64::STURBBi;
3615 case AArch64::STRBui:
3616 return AArch64::STURBi;
3617 case AArch64::LDRSBWui:
3618 return AArch64::LDURSBWi;
3619 case AArch64::LDRSBXui:
3620 return AArch64::LDURSBXi;
3621 }
3622}
3623
3624// Given the opcode of a memory load/store instruction, return the opcode of an
3625// instruction performing the same operation, but using
3626// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3627// offset register.
3628static unsigned offsetExtendOpcode(unsigned Opcode) {
3629 switch (Opcode) {
3630 default:
3631 llvm_unreachable("Address folding not implemented for instruction");
3632
3633 case AArch64::LDRQroX:
3634 case AArch64::LDURQi:
3635 case AArch64::LDRQui:
3636 return AArch64::LDRQroW;
3637 case AArch64::STRQroX:
3638 case AArch64::STURQi:
3639 case AArch64::STRQui:
3640 return AArch64::STRQroW;
3641 case AArch64::LDRDroX:
3642 case AArch64::LDURDi:
3643 case AArch64::LDRDui:
3644 return AArch64::LDRDroW;
3645 case AArch64::STRDroX:
3646 case AArch64::STURDi:
3647 case AArch64::STRDui:
3648 return AArch64::STRDroW;
3649 case AArch64::LDRXroX:
3650 case AArch64::LDURXi:
3651 case AArch64::LDRXui:
3652 return AArch64::LDRXroW;
3653 case AArch64::STRXroX:
3654 case AArch64::STURXi:
3655 case AArch64::STRXui:
3656 return AArch64::STRXroW;
3657 case AArch64::LDRWroX:
3658 case AArch64::LDURWi:
3659 case AArch64::LDRWui:
3660 return AArch64::LDRWroW;
3661 case AArch64::LDRSWroX:
3662 case AArch64::LDURSWi:
3663 case AArch64::LDRSWui:
3664 return AArch64::LDRSWroW;
3665 case AArch64::STRWroX:
3666 case AArch64::STURWi:
3667 case AArch64::STRWui:
3668 return AArch64::STRWroW;
3669 case AArch64::LDRHroX:
3670 case AArch64::LDURHi:
3671 case AArch64::LDRHui:
3672 return AArch64::LDRHroW;
3673 case AArch64::STRHroX:
3674 case AArch64::STURHi:
3675 case AArch64::STRHui:
3676 return AArch64::STRHroW;
3677 case AArch64::LDRHHroX:
3678 case AArch64::LDURHHi:
3679 case AArch64::LDRHHui:
3680 return AArch64::LDRHHroW;
3681 case AArch64::STRHHroX:
3682 case AArch64::STURHHi:
3683 case AArch64::STRHHui:
3684 return AArch64::STRHHroW;
3685 case AArch64::LDRSHXroX:
3686 case AArch64::LDURSHXi:
3687 case AArch64::LDRSHXui:
3688 return AArch64::LDRSHXroW;
3689 case AArch64::LDRSHWroX:
3690 case AArch64::LDURSHWi:
3691 case AArch64::LDRSHWui:
3692 return AArch64::LDRSHWroW;
3693 case AArch64::LDRBroX:
3694 case AArch64::LDURBi:
3695 case AArch64::LDRBui:
3696 return AArch64::LDRBroW;
3697 case AArch64::LDRBBroX:
3698 case AArch64::LDURBBi:
3699 case AArch64::LDRBBui:
3700 return AArch64::LDRBBroW;
3701 case AArch64::LDRSBXroX:
3702 case AArch64::LDURSBXi:
3703 case AArch64::LDRSBXui:
3704 return AArch64::LDRSBXroW;
3705 case AArch64::LDRSBWroX:
3706 case AArch64::LDURSBWi:
3707 case AArch64::LDRSBWui:
3708 return AArch64::LDRSBWroW;
3709 case AArch64::STRBroX:
3710 case AArch64::STURBi:
3711 case AArch64::STRBui:
3712 return AArch64::STRBroW;
3713 case AArch64::STRBBroX:
3714 case AArch64::STURBBi:
3715 case AArch64::STRBBui:
3716 return AArch64::STRBBroW;
3717 }
3718}
3719
3721 const ExtAddrMode &AM) const {
3722
3723 const DebugLoc &DL = MemI.getDebugLoc();
3724 MachineBasicBlock &MBB = *MemI.getParent();
3726
3728 if (AM.ScaledReg) {
3729 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
3730 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
3731 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3732 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3733 .addReg(MemI.getOperand(0).getReg(),
3734 MemI.mayLoad() ? RegState::Define : 0)
3735 .addReg(AM.BaseReg)
3736 .addReg(AM.ScaledReg)
3737 .addImm(0)
3738 .addImm(AM.Scale > 1)
3739 .setMemRefs(MemI.memoperands())
3740 .setMIFlags(MemI.getFlags());
3741 return B.getInstr();
3742 }
3743
3744 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
3745 "Addressing mode not supported for folding");
3746
3747 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
3748 unsigned Scale = 1;
3749 unsigned Opcode = MemI.getOpcode();
3750 if (isInt<9>(AM.Displacement))
3751 Opcode = unscaledOffsetOpcode(Opcode);
3752 else
3753 Opcode = scaledOffsetOpcode(Opcode, Scale);
3754
3755 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3756 .addReg(MemI.getOperand(0).getReg(),
3757 MemI.mayLoad() ? RegState::Define : 0)
3758 .addReg(AM.BaseReg)
3759 .addImm(AM.Displacement / Scale)
3760 .setMemRefs(MemI.memoperands())
3761 .setMIFlags(MemI.getFlags());
3762 return B.getInstr();
3763 }
3764
3767 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
3768 assert(AM.ScaledReg && !AM.Displacement &&
3769 "Address offset can be a register or an immediate, but not both");
3770 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
3771 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
3772 // Make sure the offset register is in the correct register class.
3773 Register OffsetReg = AM.ScaledReg;
3774 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
3775 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
3776 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
3777 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
3778 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
3779 }
3780 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
3781 .addReg(MemI.getOperand(0).getReg(),
3782 MemI.mayLoad() ? RegState::Define : 0)
3783 .addReg(AM.BaseReg)
3784 .addReg(OffsetReg)
3786 .addImm(AM.Scale != 1)
3787 .setMemRefs(MemI.memoperands())
3788 .setMIFlags(MemI.getFlags());
3789
3790 return B.getInstr();
3791 }
3792
3794 "Function must not be called with an addressing mode it can't handle");
3795}
3796
3797/// Return true if the opcode is a post-index ld/st instruction, which really
3798/// loads from base+0.
3799static bool isPostIndexLdStOpcode(unsigned Opcode) {
3800 switch (Opcode) {
3801 default:
3802 return false;
3803 case AArch64::LD1Fourv16b_POST:
3804 case AArch64::LD1Fourv1d_POST:
3805 case AArch64::LD1Fourv2d_POST:
3806 case AArch64::LD1Fourv2s_POST:
3807 case AArch64::LD1Fourv4h_POST:
3808 case AArch64::LD1Fourv4s_POST:
3809 case AArch64::LD1Fourv8b_POST:
3810 case AArch64::LD1Fourv8h_POST:
3811 case AArch64::LD1Onev16b_POST:
3812 case AArch64::LD1Onev1d_POST:
3813 case AArch64::LD1Onev2d_POST:
3814 case AArch64::LD1Onev2s_POST:
3815 case AArch64::LD1Onev4h_POST:
3816 case AArch64::LD1Onev4s_POST:
3817 case AArch64::LD1Onev8b_POST:
3818 case AArch64::LD1Onev8h_POST:
3819 case AArch64::LD1Rv16b_POST:
3820 case AArch64::LD1Rv1d_POST:
3821 case AArch64::LD1Rv2d_POST:
3822 case AArch64::LD1Rv2s_POST:
3823 case AArch64::LD1Rv4h_POST:
3824 case AArch64::LD1Rv4s_POST:
3825 case AArch64::LD1Rv8b_POST:
3826 case AArch64::LD1Rv8h_POST:
3827 case AArch64::LD1Threev16b_POST:
3828 case AArch64::LD1Threev1d_POST:
3829 case AArch64::LD1Threev2d_POST:
3830 case AArch64::LD1Threev2s_POST:
3831 case AArch64::LD1Threev4h_POST:
3832 case AArch64::LD1Threev4s_POST:
3833 case AArch64::LD1Threev8b_POST:
3834 case AArch64::LD1Threev8h_POST:
3835 case AArch64::LD1Twov16b_POST:
3836 case AArch64::LD1Twov1d_POST:
3837 case AArch64::LD1Twov2d_POST:
3838 case AArch64::LD1Twov2s_POST:
3839 case AArch64::LD1Twov4h_POST:
3840 case AArch64::LD1Twov4s_POST:
3841 case AArch64::LD1Twov8b_POST:
3842 case AArch64::LD1Twov8h_POST:
3843 case AArch64::LD1i16_POST:
3844 case AArch64::LD1i32_POST:
3845 case AArch64::LD1i64_POST:
3846 case AArch64::LD1i8_POST:
3847 case AArch64::LD2Rv16b_POST:
3848 case AArch64::LD2Rv1d_POST:
3849 case AArch64::LD2Rv2d_POST:
3850 case AArch64::LD2Rv2s_POST:
3851 case AArch64::LD2Rv4h_POST:
3852 case AArch64::LD2Rv4s_POST:
3853 case AArch64::LD2Rv8b_POST:
3854 case AArch64::LD2Rv8h_POST:
3855 case AArch64::LD2Twov16b_POST:
3856 case AArch64::LD2Twov2d_POST:
3857 case AArch64::LD2Twov2s_POST:
3858 case AArch64::LD2Twov4h_POST:
3859 case AArch64::LD2Twov4s_POST:
3860 case AArch64::LD2Twov8b_POST:
3861 case AArch64::LD2Twov8h_POST:
3862 case AArch64::LD2i16_POST:
3863 case AArch64::LD2i32_POST:
3864 case AArch64::LD2i64_POST:
3865 case AArch64::LD2i8_POST:
3866 case AArch64::LD3Rv16b_POST:
3867 case AArch64::LD3Rv1d_POST:
3868 case AArch64::LD3Rv2d_POST:
3869 case AArch64::LD3Rv2s_POST:
3870 case AArch64::LD3Rv4h_POST:
3871 case AArch64::LD3Rv4s_POST:
3872 case AArch64::LD3Rv8b_POST:
3873 case AArch64::LD3Rv8h_POST:
3874 case AArch64::LD3Threev16b_POST:
3875 case AArch64::LD3Threev2d_POST:
3876 case AArch64::LD3Threev2s_POST:
3877 case AArch64::LD3Threev4h_POST:
3878 case AArch64::LD3Threev4s_POST:
3879 case AArch64::LD3Threev8b_POST:
3880 case AArch64::LD3Threev8h_POST:
3881 case AArch64::LD3i16_POST:
3882 case AArch64::LD3i32_POST:
3883 case AArch64::LD3i64_POST:
3884 case AArch64::LD3i8_POST:
3885 case AArch64::LD4Fourv16b_POST:
3886 case AArch64::LD4Fourv2d_POST:
3887 case AArch64::LD4Fourv2s_POST:
3888 case AArch64::LD4Fourv4h_POST:
3889 case AArch64::LD4Fourv4s_POST:
3890 case AArch64::LD4Fourv8b_POST:
3891 case AArch64::LD4Fourv8h_POST:
3892 case AArch64::LD4Rv16b_POST:
3893 case AArch64::LD4Rv1d_POST:
3894 case AArch64::LD4Rv2d_POST:
3895 case AArch64::LD4Rv2s_POST:
3896 case AArch64::LD4Rv4h_POST:
3897 case AArch64::LD4Rv4s_POST:
3898 case AArch64::LD4Rv8b_POST:
3899 case AArch64::LD4Rv8h_POST:
3900 case AArch64::LD4i16_POST:
3901 case AArch64::LD4i32_POST:
3902 case AArch64::LD4i64_POST:
3903 case AArch64::LD4i8_POST:
3904 case AArch64::LDAPRWpost:
3905 case AArch64::LDAPRXpost:
3906 case AArch64::LDIAPPWpost:
3907 case AArch64::LDIAPPXpost:
3908 case AArch64::LDPDpost:
3909 case AArch64::LDPQpost:
3910 case AArch64::LDPSWpost:
3911 case AArch64::LDPSpost:
3912 case AArch64::LDPWpost:
3913 case AArch64::LDPXpost:
3914 case AArch64::LDRBBpost:
3915 case AArch64::LDRBpost:
3916 case AArch64::LDRDpost:
3917 case AArch64::LDRHHpost:
3918 case AArch64::LDRHpost:
3919 case AArch64::LDRQpost:
3920 case AArch64::LDRSBWpost:
3921 case AArch64::LDRSBXpost:
3922 case AArch64::LDRSHWpost:
3923 case AArch64::LDRSHXpost:
3924 case AArch64::LDRSWpost:
3925 case AArch64::LDRSpost:
3926 case AArch64::LDRWpost:
3927 case AArch64::LDRXpost:
3928 case AArch64::ST1Fourv16b_POST:
3929 case AArch64::ST1Fourv1d_POST:
3930 case AArch64::ST1Fourv2d_POST:
3931 case AArch64::ST1Fourv2s_POST:
3932 case AArch64::ST1Fourv4h_POST:
3933 case AArch64::ST1Fourv4s_POST:
3934 case AArch64::ST1Fourv8b_POST:
3935 case AArch64::ST1Fourv8h_POST:
3936 case AArch64::ST1Onev16b_POST:
3937 case AArch64::ST1Onev1d_POST:
3938 case AArch64::ST1Onev2d_POST:
3939 case AArch64::ST1Onev2s_POST:
3940 case AArch64::ST1Onev4h_POST:
3941 case AArch64::ST1Onev4s_POST:
3942 case AArch64::ST1Onev8b_POST:
3943 case AArch64::ST1Onev8h_POST:
3944 case AArch64::ST1Threev16b_POST:
3945 case AArch64::ST1Threev1d_POST:
3946 case AArch64::ST1Threev2d_POST:
3947 case AArch64::ST1Threev2s_POST:
3948 case AArch64::ST1Threev4h_POST:
3949 case AArch64::ST1Threev4s_POST:
3950 case AArch64::ST1Threev8b_POST:
3951 case AArch64::ST1Threev8h_POST:
3952 case AArch64::ST1Twov16b_POST:
3953 case AArch64::ST1Twov1d_POST:
3954 case AArch64::ST1Twov2d_POST:
3955 case AArch64::ST1Twov2s_POST:
3956 case AArch64::ST1Twov4h_POST:
3957 case AArch64::ST1Twov4s_POST:
3958 case AArch64::ST1Twov8b_POST:
3959 case AArch64::ST1Twov8h_POST:
3960 case AArch64::ST1i16_POST:
3961 case AArch64::ST1i32_POST:
3962 case AArch64::ST1i64_POST:
3963 case AArch64::ST1i8_POST:
3964 case AArch64::ST2GPostIndex:
3965 case AArch64::ST2Twov16b_POST:
3966 case AArch64::ST2Twov2d_POST:
3967 case AArch64::ST2Twov2s_POST:
3968 case AArch64::ST2Twov4h_POST:
3969 case AArch64::ST2Twov4s_POST:
3970 case AArch64::ST2Twov8b_POST:
3971 case AArch64::ST2Twov8h_POST:
3972 case AArch64::ST2i16_POST:
3973 case AArch64::ST2i32_POST:
3974 case AArch64::ST2i64_POST:
3975 case AArch64::ST2i8_POST:
3976 case AArch64::ST3Threev16b_POST:
3977 case AArch64::ST3Threev2d_POST:
3978 case AArch64::ST3Threev2s_POST:
3979 case AArch64::ST3Threev4h_POST:
3980 case AArch64::ST3Threev4s_POST:
3981 case AArch64::ST3Threev8b_POST:
3982 case AArch64::ST3Threev8h_POST:
3983 case AArch64::ST3i16_POST:
3984 case AArch64::ST3i32_POST:
3985 case AArch64::ST3i64_POST:
3986 case AArch64::ST3i8_POST:
3987 case AArch64::ST4Fourv16b_POST:
3988 case AArch64::ST4Fourv2d_POST:
3989 case AArch64::ST4Fourv2s_POST:
3990 case AArch64::ST4Fourv4h_POST:
3991 case AArch64::ST4Fourv4s_POST:
3992 case AArch64::ST4Fourv8b_POST:
3993 case AArch64::ST4Fourv8h_POST:
3994 case AArch64::ST4i16_POST:
3995 case AArch64::ST4i32_POST:
3996 case AArch64::ST4i64_POST:
3997 case AArch64::ST4i8_POST:
3998 case AArch64::STGPostIndex:
3999 case AArch64::STGPpost:
4000 case AArch64::STPDpost:
4001 case AArch64::STPQpost:
4002 case AArch64::STPSpost:
4003 case AArch64::STPWpost:
4004 case AArch64::STPXpost:
4005 case AArch64::STRBBpost:
4006 case AArch64::STRBpost:
4007 case AArch64::STRDpost:
4008 case AArch64::STRHHpost:
4009 case AArch64::STRHpost:
4010 case AArch64::STRQpost:
4011 case AArch64::STRSpost:
4012 case AArch64::STRWpost:
4013 case AArch64::STRXpost:
4014 case AArch64::STZ2GPostIndex:
4015 case AArch64::STZGPostIndex:
4016 return true;
4017 }
4018}
4019
4021 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
4022 bool &OffsetIsScalable, TypeSize &Width,
4023 const TargetRegisterInfo *TRI) const {
4024 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4025 // Handle only loads/stores with base register followed by immediate offset.
4026 if (LdSt.getNumExplicitOperands() == 3) {
4027 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
4028 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
4029 !LdSt.getOperand(2).isImm())
4030 return false;
4031 } else if (LdSt.getNumExplicitOperands() == 4) {
4032 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4033 if (!LdSt.getOperand(1).isReg() ||
4034 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
4035 !LdSt.getOperand(3).isImm())
4036 return false;
4037 } else
4038 return false;
4039
4040 // Get the scaling factor for the instruction and set the width for the
4041 // instruction.
4042 TypeSize Scale(0U, false);
4043 int64_t Dummy1, Dummy2;
4044
4045 // If this returns false, then it's an instruction we don't want to handle.
4046 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
4047 return false;
4048
4049 // Compute the offset. Offset is calculated as the immediate operand
4050 // multiplied by the scaling factor. Unscaled instructions have scaling factor
4051 // set to 1. Postindex are a special case which have an offset of 0.
4052 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
4053 BaseOp = &LdSt.getOperand(2);
4054 Offset = 0;
4055 } else if (LdSt.getNumExplicitOperands() == 3) {
4056 BaseOp = &LdSt.getOperand(1);
4057 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
4058 } else {
4059 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
4060 BaseOp = &LdSt.getOperand(2);
4061 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
4062 }
4063 OffsetIsScalable = Scale.isScalable();
4064
4065 return BaseOp->isReg() || BaseOp->isFI();
4066}
4067
4070 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4071 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
4072 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4073 return OfsOp;
4074}
4075
4076bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4077 TypeSize &Width, int64_t &MinOffset,
4078 int64_t &MaxOffset) {
4079 switch (Opcode) {
4080 // Not a memory operation or something we want to handle.
4081 default:
4082 Scale = TypeSize::getFixed(0);
4083 Width = TypeSize::getFixed(0);
4084 MinOffset = MaxOffset = 0;
4085 return false;
4086 // LDR / STR
4087 case AArch64::LDRQui:
4088 case AArch64::STRQui:
4089 Scale = TypeSize::getFixed(16);
4090 Width = TypeSize::getFixed(16);
4091 MinOffset = 0;
4092 MaxOffset = 4095;
4093 break;
4094 case AArch64::LDRXui:
4095 case AArch64::LDRDui:
4096 case AArch64::STRXui:
4097 case AArch64::STRDui:
4098 case AArch64::PRFMui:
4099 Scale = TypeSize::getFixed(8);
4100 Width = TypeSize::getFixed(8);
4101 MinOffset = 0;
4102 MaxOffset = 4095;
4103 break;
4104 case AArch64::LDRWui:
4105 case AArch64::LDRSui:
4106 case AArch64::LDRSWui:
4107 case AArch64::STRWui:
4108 case AArch64::STRSui:
4109 Scale = TypeSize::getFixed(4);
4110 Width = TypeSize::getFixed(4);
4111 MinOffset = 0;
4112 MaxOffset = 4095;
4113 break;
4114 case AArch64::LDRHui:
4115 case AArch64::LDRHHui:
4116 case AArch64::LDRSHWui:
4117 case AArch64::LDRSHXui:
4118 case AArch64::STRHui:
4119 case AArch64::STRHHui:
4120 Scale = TypeSize::getFixed(2);
4121 Width = TypeSize::getFixed(2);
4122 MinOffset = 0;
4123 MaxOffset = 4095;
4124 break;
4125 case AArch64::LDRBui:
4126 case AArch64::LDRBBui:
4127 case AArch64::LDRSBWui:
4128 case AArch64::LDRSBXui:
4129 case AArch64::STRBui:
4130 case AArch64::STRBBui:
4131 Scale = TypeSize::getFixed(1);
4132 Width = TypeSize::getFixed(1);
4133 MinOffset = 0;
4134 MaxOffset = 4095;
4135 break;
4136 // post/pre inc
4137 case AArch64::STRQpre:
4138 case AArch64::LDRQpost:
4139 Scale = TypeSize::getFixed(1);
4140 Width = TypeSize::getFixed(16);
4141 MinOffset = -256;
4142 MaxOffset = 255;
4143 break;
4144 case AArch64::LDRDpost:
4145 case AArch64::LDRDpre:
4146 case AArch64::LDRXpost:
4147 case AArch64::LDRXpre:
4148 case AArch64::STRDpost:
4149 case AArch64::STRDpre:
4150 case AArch64::STRXpost:
4151 case AArch64::STRXpre:
4152 Scale = TypeSize::getFixed(1);
4153 Width = TypeSize::getFixed(8);
4154 MinOffset = -256;
4155 MaxOffset = 255;
4156 break;
4157 case AArch64::STRWpost:
4158 case AArch64::STRWpre:
4159 case AArch64::LDRWpost:
4160 case AArch64::LDRWpre:
4161 case AArch64::STRSpost:
4162 case AArch64::STRSpre:
4163 case AArch64::LDRSpost:
4164 case AArch64::LDRSpre:
4165 Scale = TypeSize::getFixed(1);
4166 Width = TypeSize::getFixed(4);
4167 MinOffset = -256;
4168 MaxOffset = 255;
4169 break;
4170 case AArch64::LDRHpost:
4171 case AArch64::LDRHpre:
4172 case AArch64::STRHpost:
4173 case AArch64::STRHpre:
4174 case AArch64::LDRHHpost:
4175 case AArch64::LDRHHpre:
4176 case AArch64::STRHHpost:
4177 case AArch64::STRHHpre:
4178 Scale = TypeSize::getFixed(1);
4179 Width = TypeSize::getFixed(2);
4180 MinOffset = -256;
4181 MaxOffset = 255;
4182 break;
4183 case AArch64::LDRBpost:
4184 case AArch64::LDRBpre:
4185 case AArch64::STRBpost:
4186 case AArch64::STRBpre:
4187 case AArch64::LDRBBpost:
4188 case AArch64::LDRBBpre:
4189 case AArch64::STRBBpost:
4190 case AArch64::STRBBpre:
4191 Scale = TypeSize::getFixed(1);
4192 Width = TypeSize::getFixed(1);
4193 MinOffset = -256;
4194 MaxOffset = 255;
4195 break;
4196 // Unscaled
4197 case AArch64::LDURQi:
4198 case AArch64::STURQi:
4199 Scale = TypeSize::getFixed(1);
4200 Width = TypeSize::getFixed(16);
4201 MinOffset = -256;
4202 MaxOffset = 255;
4203 break;
4204 case AArch64::LDURXi:
4205 case AArch64::LDURDi:
4206 case AArch64::LDAPURXi:
4207 case AArch64::STURXi:
4208 case AArch64::STURDi:
4209 case AArch64::STLURXi:
4210 case AArch64::PRFUMi:
4211 Scale = TypeSize::getFixed(1);
4212 Width = TypeSize::getFixed(8);
4213 MinOffset = -256;
4214 MaxOffset = 255;
4215 break;
4216 case AArch64::LDURWi:
4217 case AArch64::LDURSi:
4218 case AArch64::LDURSWi:
4219 case AArch64::LDAPURi:
4220 case AArch64::LDAPURSWi:
4221 case AArch64::STURWi:
4222 case AArch64::STURSi:
4223 case AArch64::STLURWi:
4224 Scale = TypeSize::getFixed(1);
4225 Width = TypeSize::getFixed(4);
4226 MinOffset = -256;
4227 MaxOffset = 255;
4228 break;
4229 case AArch64::LDURHi:
4230 case AArch64::LDURHHi:
4231 case AArch64::LDURSHXi:
4232 case AArch64::LDURSHWi:
4233 case AArch64::LDAPURHi:
4234 case AArch64::LDAPURSHWi:
4235 case AArch64::LDAPURSHXi:
4236 case AArch64::STURHi:
4237 case AArch64::STURHHi:
4238 case AArch64::STLURHi:
4239 Scale = TypeSize::getFixed(1);
4240 Width = TypeSize::getFixed(2);
4241 MinOffset = -256;
4242 MaxOffset = 255;
4243 break;
4244 case AArch64::LDURBi:
4245 case AArch64::LDURBBi:
4246 case AArch64::LDURSBXi:
4247 case AArch64::LDURSBWi:
4248 case AArch64::LDAPURBi:
4249 case AArch64::LDAPURSBWi:
4250 case AArch64::LDAPURSBXi:
4251 case AArch64::STURBi:
4252 case AArch64::STURBBi:
4253 case AArch64::STLURBi:
4254 Scale = TypeSize::getFixed(1);
4255 Width = TypeSize::getFixed(1);
4256 MinOffset = -256;
4257 MaxOffset = 255;
4258 break;
4259 // LDP / STP (including pre/post inc)
4260 case AArch64::LDPQi:
4261 case AArch64::LDNPQi:
4262 case AArch64::STPQi:
4263 case AArch64::STNPQi:
4264 case AArch64::LDPQpost:
4265 case AArch64::LDPQpre:
4266 case AArch64::STPQpost:
4267 case AArch64::STPQpre:
4268 Scale = TypeSize::getFixed(16);
4269 Width = TypeSize::getFixed(16 * 2);
4270 MinOffset = -64;
4271 MaxOffset = 63;
4272 break;
4273 case AArch64::LDPXi:
4274 case AArch64::LDPDi:
4275 case AArch64::LDNPXi:
4276 case AArch64::LDNPDi:
4277 case AArch64::STPXi:
4278 case AArch64::STPDi:
4279 case AArch64::STNPXi:
4280 case AArch64::STNPDi:
4281 case AArch64::LDPDpost:
4282 case AArch64::LDPDpre:
4283 case AArch64::LDPXpost:
4284 case AArch64::LDPXpre:
4285 case AArch64::STPDpost:
4286 case AArch64::STPDpre:
4287 case AArch64::STPXpost:
4288 case AArch64::STPXpre:
4289 Scale = TypeSize::getFixed(8);
4290 Width = TypeSize::getFixed(8 * 2);
4291 MinOffset = -64;
4292 MaxOffset = 63;
4293 break;
4294 case AArch64::LDPWi:
4295 case AArch64::LDPSi:
4296 case AArch64::LDNPWi:
4297 case AArch64::LDNPSi:
4298 case AArch64::STPWi:
4299 case AArch64::STPSi:
4300 case AArch64::STNPWi:
4301 case AArch64::STNPSi:
4302 case AArch64::LDPSpost:
4303 case AArch64::LDPSpre:
4304 case AArch64::LDPWpost:
4305 case AArch64::LDPWpre:
4306 case AArch64::STPSpost:
4307 case AArch64::STPSpre:
4308 case AArch64::STPWpost:
4309 case AArch64::STPWpre:
4310 Scale = TypeSize::getFixed(4);
4311 Width = TypeSize::getFixed(4 * 2);
4312 MinOffset = -64;
4313 MaxOffset = 63;
4314 break;
4315 case AArch64::StoreSwiftAsyncContext:
4316 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4317 Scale = TypeSize::getFixed(1);
4318 Width = TypeSize::getFixed(8);
4319 MinOffset = 0;
4320 MaxOffset = 4095;
4321 break;
4322 case AArch64::ADDG:
4323 Scale = TypeSize::getFixed(16);
4324 Width = TypeSize::getFixed(0);
4325 MinOffset = 0;
4326 MaxOffset = 63;
4327 break;
4328 case AArch64::TAGPstack:
4329 Scale = TypeSize::getFixed(16);
4330 Width = TypeSize::getFixed(0);
4331 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4332 // of 63 (not 64!).
4333 MinOffset = -63;
4334 MaxOffset = 63;
4335 break;
4336 case AArch64::LDG:
4337 case AArch64::STGi:
4338 case AArch64::STGPreIndex:
4339 case AArch64::STGPostIndex:
4340 case AArch64::STZGi:
4341 case AArch64::STZGPreIndex:
4342 case AArch64::STZGPostIndex:
4343 Scale = TypeSize::getFixed(16);
4344 Width = TypeSize::getFixed(16);
4345 MinOffset = -256;
4346 MaxOffset = 255;
4347 break;
4348 // SVE
4349 case AArch64::STR_ZZZZXI:
4350 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
4351 case AArch64::LDR_ZZZZXI:
4352 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
4353 Scale = TypeSize::getScalable(16);
4354 Width = TypeSize::getScalable(16 * 4);
4355 MinOffset = -256;
4356 MaxOffset = 252;
4357 break;
4358 case AArch64::STR_ZZZXI:
4359 case AArch64::LDR_ZZZXI:
4360 Scale = TypeSize::getScalable(16);
4361 Width = TypeSize::getScalable(16 * 3);
4362 MinOffset = -256;
4363 MaxOffset = 253;
4364 break;
4365 case AArch64::STR_ZZXI:
4366 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
4367 case AArch64::LDR_ZZXI:
4368 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
4369 Scale = TypeSize::getScalable(16);
4370 Width = TypeSize::getScalable(16 * 2);
4371 MinOffset = -256;
4372 MaxOffset = 254;
4373 break;
4374 case AArch64::LDR_PXI:
4375 case AArch64::STR_PXI:
4376 Scale = TypeSize::getScalable(2);
4377 Width = TypeSize::getScalable(2);
4378 MinOffset = -256;
4379 MaxOffset = 255;
4380 break;
4381 case AArch64::LDR_PPXI:
4382 case AArch64::STR_PPXI:
4383 Scale = TypeSize::getScalable(2);
4384 Width = TypeSize::getScalable(2 * 2);
4385 MinOffset = -256;
4386 MaxOffset = 254;
4387 break;
4388 case AArch64::LDR_ZXI:
4389 case AArch64::STR_ZXI:
4390 Scale = TypeSize::getScalable(16);
4391 Width = TypeSize::getScalable(16);
4392 MinOffset = -256;
4393 MaxOffset = 255;
4394 break;
4395 case AArch64::LD1B_IMM:
4396 case AArch64::LD1H_IMM:
4397 case AArch64::LD1W_IMM:
4398 case AArch64::LD1D_IMM:
4399 case AArch64::LDNT1B_ZRI:
4400 case AArch64::LDNT1H_ZRI:
4401 case AArch64::LDNT1W_ZRI:
4402 case AArch64::LDNT1D_ZRI:
4403 case AArch64::ST1B_IMM:
4404 case AArch64::ST1H_IMM:
4405 case AArch64::ST1W_IMM:
4406 case AArch64::ST1D_IMM:
4407 case AArch64::STNT1B_ZRI:
4408 case AArch64::STNT1H_ZRI:
4409 case AArch64::STNT1W_ZRI:
4410 case AArch64::STNT1D_ZRI:
4411 case AArch64::LDNF1B_IMM:
4412 case AArch64::LDNF1H_IMM:
4413 case AArch64::LDNF1W_IMM:
4414 case AArch64::LDNF1D_IMM:
4415 // A full vectors worth of data
4416 // Width = mbytes * elements
4417 Scale = TypeSize::getScalable(16);
4418 Width = TypeSize::getScalable(16);
4419 MinOffset = -8;
4420 MaxOffset = 7;
4421 break;
4422 case AArch64::LD2B_IMM:
4423 case AArch64::LD2H_IMM:
4424 case AArch64::LD2W_IMM:
4425 case AArch64::LD2D_IMM:
4426 case AArch64::ST2B_IMM:
4427 case AArch64::ST2H_IMM:
4428 case AArch64::ST2W_IMM:
4429 case AArch64::ST2D_IMM:
4430 Scale = TypeSize::getScalable(32);
4431 Width = TypeSize::getScalable(16 * 2);
4432 MinOffset = -8;
4433 MaxOffset = 7;
4434 break;
4435 case AArch64::LD3B_IMM:
4436 case AArch64::LD3H_IMM:
4437 case AArch64::LD3W_IMM:
4438 case AArch64::LD3D_IMM:
4439 case AArch64::ST3B_IMM:
4440 case AArch64::ST3H_IMM:
4441 case AArch64::ST3W_IMM:
4442 case AArch64::ST3D_IMM:
4443 Scale = TypeSize::getScalable(48);
4444 Width = TypeSize::getScalable(16 * 3);
4445 MinOffset = -8;
4446 MaxOffset = 7;
4447 break;
4448 case AArch64::LD4B_IMM:
4449 case AArch64::LD4H_IMM:
4450 case AArch64::LD4W_IMM:
4451 case AArch64::LD4D_IMM:
4452 case AArch64::ST4B_IMM:
4453 case AArch64::ST4H_IMM:
4454 case AArch64::ST4W_IMM:
4455 case AArch64::ST4D_IMM:
4456 Scale = TypeSize::getScalable(64);
4457 Width = TypeSize::getScalable(16 * 4);
4458 MinOffset = -8;
4459 MaxOffset = 7;
4460 break;
4461 case AArch64::LD1B_H_IMM:
4462 case AArch64::LD1SB_H_IMM:
4463 case AArch64::LD1H_S_IMM:
4464 case AArch64::LD1SH_S_IMM:
4465 case AArch64::LD1W_D_IMM:
4466 case AArch64::LD1SW_D_IMM:
4467 case AArch64::ST1B_H_IMM:
4468 case AArch64::ST1H_S_IMM:
4469 case AArch64::ST1W_D_IMM:
4470 case AArch64::LDNF1B_H_IMM:
4471 case AArch64::LDNF1SB_H_IMM:
4472 case AArch64::LDNF1H_S_IMM:
4473 case AArch64::LDNF1SH_S_IMM:
4474 case AArch64::LDNF1W_D_IMM:
4475 case AArch64::LDNF1SW_D_IMM:
4476 // A half vector worth of data
4477 // Width = mbytes * elements
4478 Scale = TypeSize::getScalable(8);
4479 Width = TypeSize::getScalable(8);
4480 MinOffset = -8;
4481 MaxOffset = 7;
4482 break;
4483 case AArch64::LD1B_S_IMM:
4484 case AArch64::LD1SB_S_IMM:
4485 case AArch64::LD1H_D_IMM:
4486 case AArch64::LD1SH_D_IMM:
4487 case AArch64::ST1B_S_IMM:
4488 case AArch64::ST1H_D_IMM:
4489 case AArch64::LDNF1B_S_IMM:
4490 case AArch64::LDNF1SB_S_IMM:
4491 case AArch64::LDNF1H_D_IMM:
4492 case AArch64::LDNF1SH_D_IMM:
4493 // A quarter vector worth of data
4494 // Width = mbytes * elements
4495 Scale = TypeSize::getScalable(4);
4496 Width = TypeSize::getScalable(4);
4497 MinOffset = -8;
4498 MaxOffset = 7;
4499 break;
4500 case AArch64::LD1B_D_IMM:
4501 case AArch64::LD1SB_D_IMM:
4502 case AArch64::ST1B_D_IMM:
4503 case AArch64::LDNF1B_D_IMM:
4504 case AArch64::LDNF1SB_D_IMM:
4505 // A eighth vector worth of data
4506 // Width = mbytes * elements
4507 Scale = TypeSize::getScalable(2);
4508 Width = TypeSize::getScalable(2);
4509 MinOffset = -8;
4510 MaxOffset = 7;
4511 break;
4512 case AArch64::ST2Gi:
4513 case AArch64::ST2GPreIndex:
4514 case AArch64::ST2GPostIndex:
4515 case AArch64::STZ2Gi:
4516 case AArch64::STZ2GPreIndex:
4517 case AArch64::STZ2GPostIndex:
4518 Scale = TypeSize::getFixed(16);
4519 Width = TypeSize::getFixed(32);
4520 MinOffset = -256;
4521 MaxOffset = 255;
4522 break;
4523 case AArch64::STGPi:
4524 case AArch64::STGPpost:
4525 case AArch64::STGPpre:
4526 Scale = TypeSize::getFixed(16);
4527 Width = TypeSize::getFixed(16);
4528 MinOffset = -64;
4529 MaxOffset = 63;
4530 break;
4531 case AArch64::LD1RB_IMM:
4532 case AArch64::LD1RB_H_IMM:
4533 case AArch64::LD1RB_S_IMM:
4534 case AArch64::LD1RB_D_IMM:
4535 case AArch64::LD1RSB_H_IMM:
4536 case AArch64::LD1RSB_S_IMM:
4537 case AArch64::LD1RSB_D_IMM:
4538 Scale = TypeSize::getFixed(1);
4539 Width = TypeSize::getFixed(1);
4540 MinOffset = 0;
4541 MaxOffset = 63;
4542 break;
4543 case AArch64::LD1RH_IMM:
4544 case AArch64::LD1RH_S_IMM:
4545 case AArch64::LD1RH_D_IMM:
4546 case AArch64::LD1RSH_S_IMM:
4547 case AArch64::LD1RSH_D_IMM:
4548 Scale = TypeSize::getFixed(2);
4549 Width = TypeSize::getFixed(2);
4550 MinOffset = 0;
4551 MaxOffset = 63;
4552 break;
4553 case AArch64::LD1RW_IMM:
4554 case AArch64::LD1RW_D_IMM:
4555 case AArch64::LD1RSW_IMM:
4556 Scale = TypeSize::getFixed(4);
4557 Width = TypeSize::getFixed(4);
4558 MinOffset = 0;
4559 MaxOffset = 63;
4560 break;
4561 case AArch64::LD1RD_IMM:
4562 Scale = TypeSize::getFixed(8);
4563 Width = TypeSize::getFixed(8);
4564 MinOffset = 0;
4565 MaxOffset = 63;
4566 break;
4567 }
4568
4569 return true;
4570}
4571
4572// Scaling factor for unscaled load or store.
4574 switch (Opc) {
4575 default:
4576 llvm_unreachable("Opcode has unknown scale!");
4577 case AArch64::LDRBBui:
4578 case AArch64::LDURBBi:
4579 case AArch64::LDRSBWui:
4580 case AArch64::LDURSBWi:
4581 case AArch64::STRBBui:
4582 case AArch64::STURBBi:
4583 return 1;
4584 case AArch64::LDRHHui:
4585 case AArch64::LDURHHi:
4586 case AArch64::LDRSHWui:
4587 case AArch64::LDURSHWi:
4588 case AArch64::STRHHui:
4589 case AArch64::STURHHi:
4590 return 2;
4591 case AArch64::LDRSui:
4592 case AArch64::LDURSi:
4593 case AArch64::LDRSpre:
4594 case AArch64::LDRSWui:
4595 case AArch64::LDURSWi:
4596 case AArch64::LDRSWpre:
4597 case AArch64::LDRWpre:
4598 case AArch64::LDRWui:
4599 case AArch64::LDURWi:
4600 case AArch64::STRSui:
4601 case AArch64::STURSi:
4602 case AArch64::STRSpre:
4603 case AArch64::STRWui:
4604 case AArch64::STURWi:
4605 case AArch64::STRWpre:
4606 case AArch64::LDPSi:
4607 case AArch64::LDPSWi:
4608 case AArch64::LDPWi:
4609 case AArch64::STPSi:
4610 case AArch64::STPWi:
4611 return 4;
4612 case AArch64::LDRDui:
4613 case AArch64::LDURDi:
4614 case AArch64::LDRDpre:
4615 case AArch64::LDRXui:
4616 case AArch64::LDURXi:
4617 case AArch64::LDRXpre:
4618 case AArch64::STRDui:
4619 case AArch64::STURDi:
4620 case AArch64::STRDpre:
4621 case AArch64::STRXui:
4622 case AArch64::STURXi:
4623 case AArch64::STRXpre:
4624 case AArch64::LDPDi:
4625 case AArch64::LDPXi:
4626 case AArch64::STPDi:
4627 case AArch64::STPXi:
4628 return 8;
4629 case AArch64::LDRQui:
4630 case AArch64::LDURQi:
4631 case AArch64::STRQui:
4632 case AArch64::STURQi:
4633 case AArch64::STRQpre:
4634 case AArch64::LDPQi:
4635 case AArch64::LDRQpre:
4636 case AArch64::STPQi:
4637 case AArch64::STGi:
4638 case AArch64::STZGi:
4639 case AArch64::ST2Gi:
4640 case AArch64::STZ2Gi:
4641 case AArch64::STGPi:
4642 return 16;
4643 }
4644}
4645
4647 switch (MI.getOpcode()) {
4648 default:
4649 return false;
4650 case AArch64::LDRWpre:
4651 case AArch64::LDRXpre:
4652 case AArch64::LDRSWpre:
4653 case AArch64::LDRSpre:
4654 case AArch64::LDRDpre:
4655 case AArch64::LDRQpre:
4656 return true;
4657 }
4658}
4659
4661 switch (MI.getOpcode()) {
4662 default:
4663 return false;
4664 case AArch64::STRWpre:
4665 case AArch64::STRXpre:
4666 case AArch64::STRSpre:
4667 case AArch64::STRDpre:
4668 case AArch64::STRQpre:
4669 return true;
4670 }
4671}
4672
4674 return isPreLd(MI) || isPreSt(MI);
4675}
4676
4678 switch (MI.getOpcode()) {
4679 default:
4680 return false;
4681 case AArch64::LDPSi:
4682 case AArch64::LDPSWi:
4683 case AArch64::LDPDi:
4684 case AArch64::LDPQi:
4685 case AArch64::LDPWi:
4686 case AArch64::LDPXi:
4687 case AArch64::STPSi:
4688 case AArch64::STPDi:
4689 case AArch64::STPQi:
4690 case AArch64::STPWi:
4691 case AArch64::STPXi:
4692 case AArch64::STGPi:
4693 return true;
4694 }
4695}
4696
4698 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4699 unsigned Idx =
4701 : 1;
4702 return MI.getOperand(Idx);
4703}
4704
4705const MachineOperand &
4707 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4708 unsigned Idx =
4710 : 2;
4711 return MI.getOperand(Idx);
4712}
4713
4714const MachineOperand &
4716 switch (MI.getOpcode()) {
4717 default:
4718 llvm_unreachable("Unexpected opcode");
4719 case AArch64::LDRBroX:
4720 case AArch64::LDRBBroX:
4721 case AArch64::LDRSBXroX:
4722 case AArch64::LDRSBWroX:
4723 case AArch64::LDRHroX:
4724 case AArch64::LDRHHroX:
4725 case AArch64::LDRSHXroX:
4726 case AArch64::LDRSHWroX:
4727 case AArch64::LDRWroX:
4728 case AArch64::LDRSroX:
4729 case AArch64::LDRSWroX:
4730 case AArch64::LDRDroX:
4731 case AArch64::LDRXroX:
4732 case AArch64::LDRQroX:
4733 return MI.getOperand(4);
4734 }
4735}
4736
4738 Register Reg) {
4739 if (MI.getParent() == nullptr)
4740 return nullptr;
4741 const MachineFunction *MF = MI.getParent()->getParent();
4742 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
4743}
4744
4746 auto IsHFPR = [&](const MachineOperand &Op) {
4747 if (!Op.isReg())
4748 return false;
4749 auto Reg = Op.getReg();
4750 if (Reg.isPhysical())
4751 return AArch64::FPR16RegClass.contains(Reg);
4752 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4753 return TRC == &AArch64::FPR16RegClass ||
4754 TRC == &AArch64::FPR16_loRegClass;
4755 };
4756 return llvm::any_of(MI.operands(), IsHFPR);
4757}
4758
4760 auto IsQFPR = [&](const MachineOperand &Op) {
4761 if (!Op.isReg())
4762 return false;
4763 auto Reg = Op.getReg();
4764 if (Reg.isPhysical())
4765 return AArch64::FPR128RegClass.contains(Reg);
4766 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4767 return TRC == &AArch64::FPR128RegClass ||
4768 TRC == &AArch64::FPR128_loRegClass;
4769 };
4770 return llvm::any_of(MI.operands(), IsQFPR);
4771}
4772
4774 switch (MI.getOpcode()) {
4775 case AArch64::BRK:
4776 case AArch64::HLT:
4777 case AArch64::PACIASP:
4778 case AArch64::PACIBSP:
4779 // Implicit BTI behavior.
4780 return true;
4781 case AArch64::PAUTH_PROLOGUE:
4782 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
4783 return true;
4784 case AArch64::HINT: {
4785 unsigned Imm = MI.getOperand(0).getImm();
4786 // Explicit BTI instruction.
4787 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
4788 return true;
4789 // PACI(A|B)SP instructions.
4790 if (Imm == 25 || Imm == 27)
4791 return true;
4792 return false;
4793 }
4794 default:
4795 return false;
4796 }
4797}
4798
4800 if (Reg == 0)
4801 return false;
4802 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
4803 return AArch64::FPR128RegClass.contains(Reg) ||
4804 AArch64::FPR64RegClass.contains(Reg) ||
4805 AArch64::FPR32RegClass.contains(Reg) ||
4806 AArch64::FPR16RegClass.contains(Reg) ||
4807 AArch64::FPR8RegClass.contains(Reg);
4808}
4809
4811 auto IsFPR = [&](const MachineOperand &Op) {
4812 if (!Op.isReg())
4813 return false;
4814 auto Reg = Op.getReg();
4815 if (Reg.isPhysical())
4816 return isFpOrNEON(Reg);
4817
4818 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
4819 return TRC == &AArch64::FPR128RegClass ||
4820 TRC == &AArch64::FPR128_loRegClass ||
4821 TRC == &AArch64::FPR64RegClass ||
4822 TRC == &AArch64::FPR64_loRegClass ||
4823 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
4824 TRC == &AArch64::FPR8RegClass;
4825 };
4826 return llvm::any_of(MI.operands(), IsFPR);
4827}
4828
4829// Scale the unscaled offsets. Returns false if the unscaled offset can't be
4830// scaled.
4831static bool scaleOffset(unsigned Opc, int64_t &Offset) {
4833
4834 // If the byte-offset isn't a multiple of the stride, we can't scale this
4835 // offset.
4836 if (Offset % Scale != 0)
4837 return false;
4838
4839 // Convert the byte-offset used by unscaled into an "element" offset used
4840 // by the scaled pair load/store instructions.
4841 Offset /= Scale;
4842 return true;
4843}
4844
4845static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
4846 if (FirstOpc == SecondOpc)
4847 return true;
4848 // We can also pair sign-ext and zero-ext instructions.
4849 switch (FirstOpc) {
4850 default:
4851 return false;
4852 case AArch64::STRSui:
4853 case AArch64::STURSi:
4854 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
4855 case AArch64::STRDui:
4856 case AArch64::STURDi:
4857 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
4858 case AArch64::STRQui:
4859 case AArch64::STURQi:
4860 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
4861 case AArch64::STRWui:
4862 case AArch64::STURWi:
4863 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
4864 case AArch64::STRXui:
4865 case AArch64::STURXi:
4866 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
4867 case AArch64::LDRSui:
4868 case AArch64::LDURSi:
4869 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
4870 case AArch64::LDRDui:
4871 case AArch64::LDURDi:
4872 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
4873 case AArch64::LDRQui:
4874 case AArch64::LDURQi:
4875 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
4876 case AArch64::LDRWui:
4877 case AArch64::LDURWi:
4878 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
4879 case AArch64::LDRSWui:
4880 case AArch64::LDURSWi:
4881 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
4882 case AArch64::LDRXui:
4883 case AArch64::LDURXi:
4884 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
4885 }
4886 // These instructions can't be paired based on their opcodes.
4887 return false;
4888}
4889
4890static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
4891 int64_t Offset1, unsigned Opcode1, int FI2,
4892 int64_t Offset2, unsigned Opcode2) {
4893 // Accesses through fixed stack object frame indices may access a different
4894 // fixed stack slot. Check that the object offsets + offsets match.
4895 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
4896 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
4897 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
4898 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
4899 // Convert to scaled object offsets.
4900 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
4901 if (ObjectOffset1 % Scale1 != 0)
4902 return false;
4903 ObjectOffset1 /= Scale1;
4904 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
4905 if (ObjectOffset2 % Scale2 != 0)
4906 return false;
4907 ObjectOffset2 /= Scale2;
4908 ObjectOffset1 += Offset1;
4909 ObjectOffset2 += Offset2;
4910 return ObjectOffset1 + 1 == ObjectOffset2;
4911 }
4912
4913 return FI1 == FI2;
4914}
4915
4916/// Detect opportunities for ldp/stp formation.
4917///
4918/// Only called for LdSt for which getMemOperandWithOffset returns true.
4920 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
4921 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
4922 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
4923 unsigned NumBytes) const {
4924 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
4925 const MachineOperand &BaseOp1 = *BaseOps1.front();
4926 const MachineOperand &BaseOp2 = *BaseOps2.front();
4927 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
4928 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
4929 if (BaseOp1.getType() != BaseOp2.getType())
4930 return false;
4931
4932 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
4933 "Only base registers and frame indices are supported.");
4934
4935 // Check for both base regs and base FI.
4936 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
4937 return false;
4938
4939 // Only cluster up to a single pair.
4940 if (ClusterSize > 2)
4941 return false;
4942
4943 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
4944 return false;
4945
4946 // Can we pair these instructions based on their opcodes?
4947 unsigned FirstOpc = FirstLdSt.getOpcode();
4948 unsigned SecondOpc = SecondLdSt.getOpcode();
4949 if (!canPairLdStOpc(FirstOpc, SecondOpc))
4950 return false;
4951
4952 // Can't merge volatiles or load/stores that have a hint to avoid pair
4953 // formation, for example.
4954 if (!isCandidateToMergeOrPair(FirstLdSt) ||
4955 !isCandidateToMergeOrPair(SecondLdSt))
4956 return false;
4957
4958 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
4959 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
4960 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
4961 return false;
4962
4963 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
4964 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
4965 return false;
4966
4967 // Pairwise instructions have a 7-bit signed offset field.
4968 if (Offset1 > 63 || Offset1 < -64)
4969 return false;
4970
4971 // The caller should already have ordered First/SecondLdSt by offset.
4972 // Note: except for non-equal frame index bases
4973 if (BaseOp1.isFI()) {
4974 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
4975 "Caller should have ordered offsets.");
4976
4977 const MachineFrameInfo &MFI =
4978 FirstLdSt.getParent()->getParent()->getFrameInfo();
4979 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
4980 BaseOp2.getIndex(), Offset2, SecondOpc);
4981 }
4982
4983 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
4984
4985 return Offset1 + 1 == Offset2;
4986}
4987
4989 MCRegister Reg, unsigned SubIdx,
4990 unsigned State,
4991 const TargetRegisterInfo *TRI) {
4992 if (!SubIdx)
4993 return MIB.addReg(Reg, State);
4994
4995 if (Reg.isPhysical())
4996 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
4997 return MIB.addReg(Reg, State, SubIdx);
4998}
4999
5000static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
5001 unsigned NumRegs) {
5002 // We really want the positive remainder mod 32 here, that happens to be
5003 // easily obtainable with a mask.
5004 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
5005}
5006
5009 const DebugLoc &DL, MCRegister DestReg,
5010 MCRegister SrcReg, bool KillSrc,
5011 unsigned Opcode,
5012 ArrayRef<unsigned> Indices) const {
5013 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
5015 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5016 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5017 unsigned NumRegs = Indices.size();
5018
5019 int SubReg = 0, End = NumRegs, Incr = 1;
5020 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
5021 SubReg = NumRegs - 1;
5022 End = -1;
5023 Incr = -1;
5024 }
5025
5026 for (; SubReg != End; SubReg += Incr) {
5027 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5028 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5029 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
5030 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5031 }
5032}
5033
5036 const DebugLoc &DL, MCRegister DestReg,
5037 MCRegister SrcReg, bool KillSrc,
5038 unsigned Opcode, unsigned ZeroReg,
5039 llvm::ArrayRef<unsigned> Indices) const {
5041 unsigned NumRegs = Indices.size();
5042
5043#ifndef NDEBUG
5044 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5045 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5046 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
5047 "GPR reg sequences should not be able to overlap");
5048#endif
5049
5050 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
5051 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5052 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5053 MIB.addReg(ZeroReg);
5054 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5055 MIB.addImm(0);
5056 }
5057}
5058
5061 const DebugLoc &DL, Register DestReg,
5062 Register SrcReg, bool KillSrc,
5063 bool RenamableDest,
5064 bool RenamableSrc) const {
5065 if (AArch64::GPR32spRegClass.contains(DestReg) &&
5066 (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
5068
5069 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
5070 // If either operand is WSP, expand to ADD #0.
5071 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5072 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5073 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5074 MCRegister DestRegX = TRI->getMatchingSuperReg(
5075 DestReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
5076 MCRegister SrcRegX = TRI->getMatchingSuperReg(
5077 SrcReg, AArch64::sub_32, &AArch64::GPR64spRegClass);
5078 // This instruction is reading and writing X registers. This may upset
5079 // the register scavenger and machine verifier, so we need to indicate
5080 // that we are reading an undefined value from SrcRegX, but a proper
5081 // value from SrcReg.
5082 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
5083 .addReg(SrcRegX, RegState::Undef)
5084 .addImm(0)
5086 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5087 } else {
5088 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
5089 .addReg(SrcReg, getKillRegState(KillSrc))
5090 .addImm(0)
5092 }
5093 } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGPR32()) {
5094 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5095 .addImm(0)
5097 } else if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5098 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5099 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5100 MCRegister DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
5101 &AArch64::GPR64spRegClass);
5102 assert(DestRegX.isValid() && "Destination super-reg not valid");
5103 MCRegister SrcRegX =
5104 SrcReg == AArch64::WZR
5105 ? AArch64::XZR
5106 : TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
5107 &AArch64::GPR64spRegClass);
5108 assert(SrcRegX.isValid() && "Source super-reg not valid");
5109 // This instruction is reading and writing X registers. This may upset
5110 // the register scavenger and machine verifier, so we need to indicate
5111 // that we are reading an undefined value from SrcRegX, but a proper
5112 // value from SrcReg.
5113 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
5114 .addReg(AArch64::XZR)
5115 .addReg(SrcRegX, RegState::Undef)
5116 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5117 } else {
5118 // Otherwise, expand to ORR WZR.
5119 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5120 .addReg(AArch64::WZR)
5121 .addReg(SrcReg, getKillRegState(KillSrc));
5122 }
5123 return;
5124 }
5125
5126 // Copy a Predicate register by ORRing with itself.
5127 if (AArch64::PPRRegClass.contains(DestReg) &&
5128 AArch64::PPRRegClass.contains(SrcReg)) {
5129 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5130 "Unexpected SVE register.");
5131 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
5132 .addReg(SrcReg) // Pg
5133 .addReg(SrcReg)
5134 .addReg(SrcReg, getKillRegState(KillSrc));
5135 return;
5136 }
5137
5138 // Copy a predicate-as-counter register by ORRing with itself as if it
5139 // were a regular predicate (mask) register.
5140 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
5141 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
5142 if (DestIsPNR || SrcIsPNR) {
5143 auto ToPPR = [](MCRegister R) -> MCRegister {
5144 return (R - AArch64::PN0) + AArch64::P0;
5145 };
5146 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();
5147 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();
5148
5149 if (PPRSrcReg != PPRDestReg) {
5150 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
5151 .addReg(PPRSrcReg) // Pg
5152 .addReg(PPRSrcReg)
5153 .addReg(PPRSrcReg, getKillRegState(KillSrc));
5154 if (DestIsPNR)
5155 NewMI.addDef(DestReg, RegState::Implicit);
5156 }
5157 return;
5158 }
5159
5160 // Copy a Z register by ORRing with itself.
5161 if (AArch64::ZPRRegClass.contains(DestReg) &&
5162 AArch64::ZPRRegClass.contains(SrcReg)) {
5163 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5164 "Unexpected SVE register.");
5165 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
5166 .addReg(SrcReg)
5167 .addReg(SrcReg, getKillRegState(KillSrc));
5168 return;
5169 }
5170
5171 // Copy a Z register pair by copying the individual sub-registers.
5172 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5173 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5174 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5175 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5176 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5177 "Unexpected SVE register.");
5178 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5179 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5180 Indices);
5181 return;
5182 }
5183
5184 // Copy a Z register triple by copying the individual sub-registers.
5185 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5186 AArch64::ZPR3RegClass.contains(SrcReg)) {
5187 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5188 "Unexpected SVE register.");
5189 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5190 AArch64::zsub2};
5191 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5192 Indices);
5193 return;
5194 }
5195
5196 // Copy a Z register quad by copying the individual sub-registers.
5197 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5198 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5199 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5200 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5201 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5202 "Unexpected SVE register.");
5203 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5204 AArch64::zsub2, AArch64::zsub3};
5205 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5206 Indices);
5207 return;
5208 }
5209
5210 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5211 (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
5212 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5213 // If either operand is SP, expand to ADD #0.
5214 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5215 .addReg(SrcReg, getKillRegState(KillSrc))
5216 .addImm(0)
5218 } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGPR64()) {
5219 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5220 .addImm(0)
5222 } else {
5223 // Otherwise, expand to ORR XZR.
5224 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5225 .addReg(AArch64::XZR)
5226 .addReg(SrcReg, getKillRegState(KillSrc));
5227 }
5228 return;
5229 }
5230
5231 // Copy a DDDD register quad by copying the individual sub-registers.
5232 if (AArch64::DDDDRegClass.contains(DestReg) &&
5233 AArch64::DDDDRegClass.contains(SrcReg)) {
5234 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5235 AArch64::dsub2, AArch64::dsub3};
5236 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5237 Indices);
5238 return;
5239 }
5240
5241 // Copy a DDD register triple by copying the individual sub-registers.
5242 if (AArch64::DDDRegClass.contains(DestReg) &&
5243 AArch64::DDDRegClass.contains(SrcReg)) {
5244 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5245 AArch64::dsub2};
5246 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5247 Indices);
5248 return;
5249 }
5250
5251 // Copy a DD register pair by copying the individual sub-registers.
5252 if (AArch64::DDRegClass.contains(DestReg) &&
5253 AArch64::DDRegClass.contains(SrcReg)) {
5254 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5255 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5256 Indices);
5257 return;
5258 }
5259
5260 // Copy a QQQQ register quad by copying the individual sub-registers.
5261 if (AArch64::QQQQRegClass.contains(DestReg) &&
5262 AArch64::QQQQRegClass.contains(SrcReg)) {
5263 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5264 AArch64::qsub2, AArch64::qsub3};
5265 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5266 Indices);
5267 return;
5268 }
5269
5270 // Copy a QQQ register triple by copying the individual sub-registers.
5271 if (AArch64::QQQRegClass.contains(DestReg) &&
5272 AArch64::QQQRegClass.contains(SrcReg)) {
5273 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5274 AArch64::qsub2};
5275 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5276 Indices);
5277 return;
5278 }
5279
5280 // Copy a QQ register pair by copying the individual sub-registers.
5281 if (AArch64::QQRegClass.contains(DestReg) &&
5282 AArch64::QQRegClass.contains(SrcReg)) {
5283 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5284 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5285 Indices);
5286 return;
5287 }
5288
5289 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5290 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5291 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5292 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5293 AArch64::XZR, Indices);
5294 return;
5295 }
5296
5297 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5298 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5299 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5300 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5301 AArch64::WZR, Indices);
5302 return;
5303 }
5304
5305 if (AArch64::FPR128RegClass.contains(DestReg) &&
5306 AArch64::FPR128RegClass.contains(SrcReg)) {
5307 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5308 !Subtarget.isNeonAvailable())
5309 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5310 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5311 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5312 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5313 else if (Subtarget.isNeonAvailable())
5314 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5315 .addReg(SrcReg)
5316 .addReg(SrcReg, getKillRegState(KillSrc));
5317 else {
5318 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5319 .addReg(AArch64::SP, RegState::Define)
5320 .addReg(SrcReg, getKillRegState(KillSrc))
5321 .addReg(AArch64::SP)
5322 .addImm(-16);
5323 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5324 .addReg(AArch64::SP, RegState::Define)
5325 .addReg(DestReg, RegState::Define)
5326 .addReg(AArch64::SP)
5327 .addImm(16);
5328 }
5329 return;
5330 }
5331
5332 if (AArch64::FPR64RegClass.contains(DestReg) &&
5333 AArch64::FPR64RegClass.contains(SrcReg)) {
5334 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5335 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5336 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5338 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::dsub,
5339 &AArch64::FPR128RegClass);
5340 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::dsub,
5341 &AArch64::FPR128RegClass);
5342 // This instruction is reading and writing Q registers. This may upset
5343 // the register scavenger and machine verifier, so we need to indicate
5344 // that we are reading an undefined value from SrcRegQ, but a proper
5345 // value from SrcReg.
5346 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5347 .addReg(SrcRegQ, RegState::Undef)
5348 .addReg(SrcRegQ, RegState::Undef)
5349 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5350 } else {
5351 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5352 .addReg(SrcReg, getKillRegState(KillSrc));
5353 }
5354 return;
5355 }
5356
5357 if (AArch64::FPR32RegClass.contains(DestReg) &&
5358 AArch64::FPR32RegClass.contains(SrcReg)) {
5359 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5360 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5361 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5363 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
5364 &AArch64::FPR128RegClass);
5365 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub,
5366 &AArch64::FPR128RegClass);
5367 // This instruction is reading and writing Q registers. This may upset
5368 // the register scavenger and machine verifier, so we need to indicate
5369 // that we are reading an undefined value from SrcRegQ, but a proper
5370 // value from SrcReg.
5371 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5372 .addReg(SrcRegQ, RegState::Undef)
5373 .addReg(SrcRegQ, RegState::Undef)
5374 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5375 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5376 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5378 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::ssub,
5379 &AArch64::FPR64RegClass);
5380 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::ssub,
5381 &AArch64::FPR64RegClass);
5382 // This instruction is reading and writing D registers. This may upset
5383 // the register scavenger and machine verifier, so we need to indicate
5384 // that we are reading an undefined value from SrcRegD, but a proper
5385 // value from SrcReg.
5386 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5387 .addReg(SrcRegD, RegState::Undef)
5388 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5389 } else {
5390 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5391 .addReg(SrcReg, getKillRegState(KillSrc));
5392 }
5393 return;
5394 }
5395
5396 if (AArch64::FPR16RegClass.contains(DestReg) &&
5397 AArch64::FPR16RegClass.contains(SrcReg)) {
5398 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5399 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5400 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5402 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
5403 &AArch64::FPR128RegClass);
5404 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub,
5405 &AArch64::FPR128RegClass);
5406 // This instruction is reading and writing Q registers. This may upset
5407 // the register scavenger and machine verifier, so we need to indicate
5408 // that we are reading an undefined value from SrcRegQ, but a proper
5409 // value from SrcReg.
5410 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5411 .addReg(SrcRegQ, RegState::Undef)
5412 .addReg(SrcRegQ, RegState::Undef)
5413 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5414 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5415 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5417 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::hsub,
5418 &AArch64::FPR64RegClass);
5419 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::hsub,
5420 &AArch64::FPR64RegClass);
5421 // This instruction is reading and writing D registers. This may upset
5422 // the register scavenger and machine verifier, so we need to indicate
5423 // that we are reading an undefined value from SrcRegD, but a proper
5424 // value from SrcReg.
5425 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5426 .addReg(SrcRegD, RegState::Undef)
5427 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5428 } else {
5429 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5430 &AArch64::FPR32RegClass);
5431 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5432 &AArch64::FPR32RegClass);
5433 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5434 .addReg(SrcReg, getKillRegState(KillSrc));
5435 }
5436 return;
5437 }
5438
5439 if (AArch64::FPR8RegClass.contains(DestReg) &&
5440 AArch64::FPR8RegClass.contains(SrcReg)) {
5441 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5442 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5443 !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) {
5445 MCRegister DestRegQ = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
5446 &AArch64::FPR128RegClass);
5447 MCRegister SrcRegQ = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub,
5448 &AArch64::FPR128RegClass);
5449 // This instruction is reading and writing Q registers. This may upset
5450 // the register scavenger and machine verifier, so we need to indicate
5451 // that we are reading an undefined value from SrcRegQ, but a proper
5452 // value from SrcReg.
5453 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5454 .addReg(SrcRegQ, RegState::Undef)
5455 .addReg(SrcRegQ, RegState::Undef)
5456 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5457 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5458 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5460 MCRegister DestRegD = TRI->getMatchingSuperReg(DestReg, AArch64::bsub,
5461 &AArch64::FPR64RegClass);
5462 MCRegister SrcRegD = TRI->getMatchingSuperReg(SrcReg, AArch64::bsub,
5463 &AArch64::FPR64RegClass);
5464 // This instruction is reading and writing D registers. This may upset
5465 // the register scavenger and machine verifier, so we need to indicate
5466 // that we are reading an undefined value from SrcRegD, but a proper
5467 // value from SrcReg.
5468 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5469 .addReg(SrcRegD, RegState::Undef)
5470 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5471 } else {
5472 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5473 &AArch64::FPR32RegClass);
5474 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5475 &AArch64::FPR32RegClass);
5476 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5477 .addReg(SrcReg, getKillRegState(KillSrc));
5478 }
5479 return;
5480 }
5481
5482 // Copies between GPR64 and FPR64.
5483 if (AArch64::FPR64RegClass.contains(DestReg) &&
5484 AArch64::GPR64RegClass.contains(SrcReg)) {
5485 if (AArch64::XZR == SrcReg) {
5486 BuildMI(MBB, I, DL, get(AArch64::FMOVD0), DestReg);
5487 } else {
5488 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5489 .addReg(SrcReg, getKillRegState(KillSrc));
5490 }
5491 return;
5492 }
5493 if (AArch64::GPR64RegClass.contains(DestReg) &&
5494 AArch64::FPR64RegClass.contains(SrcReg)) {
5495 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5496 .addReg(SrcReg, getKillRegState(KillSrc));
5497 return;
5498 }
5499 // Copies between GPR32 and FPR32.
5500 if (AArch64::FPR32RegClass.contains(DestReg) &&
5501 AArch64::GPR32RegClass.contains(SrcReg)) {
5502 if (AArch64::WZR == SrcReg) {
5503 BuildMI(MBB, I, DL, get(AArch64::FMOVS0), DestReg);
5504 } else {
5505 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5506 .addReg(SrcReg, getKillRegState(KillSrc));
5507 }
5508 return;
5509 }
5510 if (AArch64::GPR32RegClass.contains(DestReg) &&
5511 AArch64::FPR32RegClass.contains(SrcReg)) {
5512 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5513 .addReg(SrcReg, getKillRegState(KillSrc));
5514 return;
5515 }
5516
5517 if (DestReg == AArch64::NZCV) {
5518 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5519 BuildMI(MBB, I, DL, get(AArch64::MSR))
5520 .addImm(AArch64SysReg::NZCV)
5521 .addReg(SrcReg, getKillRegState(KillSrc))
5522 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5523 return;
5524 }
5525
5526 if (SrcReg == AArch64::NZCV) {
5527 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5528 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5529 .addImm(AArch64SysReg::NZCV)
5530 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5531 return;
5532 }
5533
5534#ifndef NDEBUG
5536 errs() << TRI.getRegAsmName(DestReg) << " = COPY "
5537 << TRI.getRegAsmName(SrcReg) << "\n";
5538#endif
5539 llvm_unreachable("unimplemented reg-to-reg copy");
5540}
5541
5544 MachineBasicBlock::iterator InsertBefore,
5545 const MCInstrDesc &MCID,
5546 Register SrcReg, bool IsKill,
5547 unsigned SubIdx0, unsigned SubIdx1, int FI,
5548 MachineMemOperand *MMO) {
5549 Register SrcReg0 = SrcReg;
5550 Register SrcReg1 = SrcReg;
5551 if (SrcReg.isPhysical()) {
5552 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
5553 SubIdx0 = 0;
5554 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
5555 SubIdx1 = 0;
5556 }
5557 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5558 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
5559 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
5560 .addFrameIndex(FI)
5561 .addImm(0)
5562 .addMemOperand(MMO);
5563}
5564
5567 Register SrcReg, bool isKill, int FI,
5568 const TargetRegisterClass *RC,
5569 const TargetRegisterInfo *TRI,
5570 Register VReg,
5571 MachineInstr::MIFlag Flags) const {
5572 MachineFunction &MF = *MBB.getParent();
5573 MachineFrameInfo &MFI = MF.getFrameInfo();
5574
5576 MachineMemOperand *MMO =
5578 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5579 unsigned Opc = 0;
5580 bool Offset = true;
5582 unsigned StackID = TargetStackID::Default;
5583 switch (TRI->getSpillSize(*RC)) {
5584 case 1:
5585 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5586 Opc = AArch64::STRBui;
5587 break;
5588 case 2: {
5589 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5590 Opc = AArch64::STRHui;
5591 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
5592 AArch64::PPRRegClass.hasSubClassEq(RC)) {
5593 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5594 "Unexpected register store without SVE store instructions");
5595 Opc = AArch64::STR_PXI;
5597 }
5598 break;
5599 }
5600 case 4:
5601 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5602 Opc = AArch64::STRWui;
5603 if (SrcReg.isVirtual())
5604 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
5605 else
5606 assert(SrcReg != AArch64::WSP);
5607 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5608 Opc = AArch64::STRSui;
5609 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5610 Opc = AArch64::STR_PPXI;
5612 }
5613 break;
5614 case 8:
5615 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5616 Opc = AArch64::STRXui;
5617 if (SrcReg.isVirtual())
5618 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5619 else
5620 assert(SrcReg != AArch64::SP);
5621 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5622 Opc = AArch64::STRDui;
5623 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5625 get(AArch64::STPWi), SrcReg, isKill,
5626 AArch64::sube32, AArch64::subo32, FI, MMO);
5627 return;
5628 }
5629 break;
5630 case 16:
5631 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5632 Opc = AArch64::STRQui;
5633 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5634 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5635 Opc = AArch64::ST1Twov1d;
5636 Offset = false;
5637 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5639 get(AArch64::STPXi), SrcReg, isKill,
5640 AArch64::sube64, AArch64::subo64, FI, MMO);
5641 return;
5642 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5643 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5644 "Unexpected register store without SVE store instructions");
5645 Opc = AArch64::STR_ZXI;
5647 }
5648 break;
5649 case 24:
5650 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5651 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5652 Opc = AArch64::ST1Threev1d;
5653 Offset = false;
5654 }
5655 break;
5656 case 32:
5657 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5658 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5659 Opc = AArch64::ST1Fourv1d;
5660 Offset = false;
5661 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5662 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5663 Opc = AArch64::ST1Twov2d;
5664 Offset = false;
5665 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5666 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5667 "Unexpected register store without SVE store instructions");
5668 Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
5670 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5671 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5672 "Unexpected register store without SVE store instructions");
5673 Opc = AArch64::STR_ZZXI;
5675 }
5676 break;
5677 case 48:
5678 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5679 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5680 Opc = AArch64::ST1Threev2d;
5681 Offset = false;
5682 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5683 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5684 "Unexpected register store without SVE store instructions");
5685 Opc = AArch64::STR_ZZZXI;
5687 }
5688 break;
5689 case 64:
5690 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5691 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5692 Opc = AArch64::ST1Fourv2d;
5693 Offset = false;
5694 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5695 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5696 "Unexpected register store without SVE store instructions");
5697 Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
5699 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5700 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5701 "Unexpected register store without SVE store instructions");
5702 Opc = AArch64::STR_ZZZZXI;
5704 }
5705 break;
5706 }
5707 assert(Opc && "Unknown register class");
5708 MFI.setStackID(FI, StackID);
5709
5711 .addReg(SrcReg, getKillRegState(isKill))
5712 .addFrameIndex(FI);
5713
5714 if (Offset)
5715 MI.addImm(0);
5716 if (PNRReg.isValid())
5717 MI.addDef(PNRReg, RegState::Implicit);
5718 MI.addMemOperand(MMO);
5719}
5720
5723 MachineBasicBlock::iterator InsertBefore,
5724 const MCInstrDesc &MCID,
5725 Register DestReg, unsigned SubIdx0,
5726 unsigned SubIdx1, int FI,
5727 MachineMemOperand *MMO) {
5728 Register DestReg0 = DestReg;
5729 Register DestReg1 = DestReg;
5730 bool IsUndef = true;
5731 if (DestReg.isPhysical()) {
5732 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
5733 SubIdx0 = 0;
5734 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
5735 SubIdx1 = 0;
5736 IsUndef = false;
5737 }
5738 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5739 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
5740 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
5741 .addFrameIndex(FI)
5742 .addImm(0)
5743 .addMemOperand(MMO);
5744}
5745
5748 int FI, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI,
5749 Register VReg, MachineInstr::MIFlag Flags) const {
5750 MachineFunction &MF = *MBB.getParent();
5751 MachineFrameInfo &MFI = MF.getFrameInfo();
5753 MachineMemOperand *MMO =
5755 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5756
5757 unsigned Opc = 0;
5758 bool Offset = true;
5759 unsigned StackID = TargetStackID::Default;
5761 switch (TRI->getSpillSize(*RC)) {
5762 case 1:
5763 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5764 Opc = AArch64::LDRBui;
5765 break;
5766 case 2: {
5767 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
5768 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5769 Opc = AArch64::LDRHui;
5770 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
5771 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5772 "Unexpected register load without SVE load instructions");
5773 if (IsPNR)
5774 PNRReg = DestReg;
5775 Opc = AArch64::LDR_PXI;
5777 }
5778 break;
5779 }
5780 case 4:
5781 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5782 Opc = AArch64::LDRWui;
5783 if (DestReg.isVirtual())
5784 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
5785 else
5786 assert(DestReg != AArch64::WSP);
5787 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5788 Opc = AArch64::LDRSui;
5789 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5790 Opc = AArch64::LDR_PPXI;
5792 }
5793 break;
5794 case 8:
5795 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5796 Opc = AArch64::LDRXui;
5797 if (DestReg.isVirtual())
5798 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
5799 else
5800 assert(DestReg != AArch64::SP);
5801 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5802 Opc = AArch64::LDRDui;
5803 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5805 get(AArch64::LDPWi), DestReg, AArch64::sube32,
5806 AArch64::subo32, FI, MMO);
5807 return;
5808 }
5809 break;
5810 case 16:
5811 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5812 Opc = AArch64::LDRQui;
5813 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5814 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5815 Opc = AArch64::LD1Twov1d;
5816 Offset = false;
5817 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5819 get(AArch64::LDPXi), DestReg, AArch64::sube64,
5820 AArch64::subo64, FI, MMO);
5821 return;
5822 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5823 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5824 "Unexpected register load without SVE load instructions");
5825 Opc = AArch64::LDR_ZXI;
5827 }
5828 break;
5829 case 24:
5830 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5831 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5832 Opc = AArch64::LD1Threev1d;
5833 Offset = false;
5834 }
5835 break;
5836 case 32:
5837 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5838 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5839 Opc = AArch64::LD1Fourv1d;
5840 Offset = false;
5841 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5842 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5843 Opc = AArch64::LD1Twov2d;
5844 Offset = false;
5845 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5846 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5847 "Unexpected register load without SVE load instructions");
5848 Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
5850 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5851 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5852 "Unexpected register load without SVE load instructions");
5853 Opc = AArch64::LDR_ZZXI;
5855 }
5856 break;
5857 case 48:
5858 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5859 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5860 Opc = AArch64::LD1Threev2d;
5861 Offset = false;
5862 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5863 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5864 "Unexpected register load without SVE load instructions");
5865 Opc = AArch64::LDR_ZZZXI;
5867 }
5868 break;
5869 case 64:
5870 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5871 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
5872 Opc = AArch64::LD1Fourv2d;
5873 Offset = false;
5874 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5875 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5876 "Unexpected register load without SVE load instructions");
5877 Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
5879 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5880 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5881 "Unexpected register load without SVE load instructions");
5882 Opc = AArch64::LDR_ZZZZXI;
5884 }
5885 break;
5886 }
5887
5888 assert(Opc && "Unknown register class");
5889 MFI.setStackID(FI, StackID);
5890
5892 .addReg(DestReg, getDefRegState(true))
5893 .addFrameIndex(FI);
5894 if (Offset)
5895 MI.addImm(0);
5896 if (PNRReg.isValid() && !PNRReg.isVirtual())
5897 MI.addDef(PNRReg, RegState::Implicit);
5898 MI.addMemOperand(MMO);
5899}
5900
5902 const MachineInstr &UseMI,
5903 const TargetRegisterInfo *TRI) {
5904 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
5905 UseMI.getIterator()),
5906 [TRI](const MachineInstr &I) {
5907 return I.modifiesRegister(AArch64::NZCV, TRI) ||
5908 I.readsRegister(AArch64::NZCV, TRI);
5909 });
5910}
5911
5912void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
5913 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
5914 // The smallest scalable element supported by scaled SVE addressing
5915 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5916 // byte offset must always be a multiple of 2.
5917 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5918
5919 // VGSized offsets are divided by '2', because the VG register is the
5920 // the number of 64bit granules as opposed to 128bit vector chunks,
5921 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
5922 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
5923 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
5924 ByteSized = Offset.getFixed();
5925 VGSized = Offset.getScalable() / 2;
5926}
5927
5928/// Returns the offset in parts to which this frame offset can be
5929/// decomposed for the purpose of describing a frame offset.
5930/// For non-scalable offsets this is simply its byte size.
5931void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
5932 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
5933 int64_t &NumDataVectors) {
5934 // The smallest scalable element supported by scaled SVE addressing
5935 // modes are predicates, which are 2 scalable bytes in size. So the scalable
5936 // byte offset must always be a multiple of 2.
5937 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
5938
5939 NumBytes = Offset.getFixed();
5940 NumDataVectors = 0;
5941 NumPredicateVectors = Offset.getScalable() / 2;
5942 // This method is used to get the offsets to adjust the frame offset.
5943 // If the function requires ADDPL to be used and needs more than two ADDPL
5944 // instructions, part of the offset is folded into NumDataVectors so that it
5945 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
5946 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
5947 NumPredicateVectors > 62) {
5948 NumDataVectors = NumPredicateVectors / 8;
5949 NumPredicateVectors -= NumDataVectors * 8;
5950 }
5951}
5952
5953// Convenience function to create a DWARF expression for: Constant `Operation`.
5954// This helper emits compact sequences for common cases. For example, for`-15
5955// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.
5958 if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {
5959 // -Constant (1 to 31)
5960 Expr.push_back(dwarf::DW_OP_lit0 - Constant);
5961 Operation = dwarf::DW_OP_minus;
5962 } else if (Constant >= 0 && Constant <= 31) {
5963 // Literal value 0 to 31
5964 Expr.push_back(dwarf::DW_OP_lit0 + Constant);
5965 } else {
5966 // Signed constant
5967 Expr.push_back(dwarf::DW_OP_consts);
5969 }
5970 return Expr.push_back(Operation);
5971}
5972
5973// Convenience function to create a DWARF expression for a register.
5974static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {
5975 Expr.push_back((char)dwarf::DW_OP_bregx);
5977 Expr.push_back(0);
5978}
5979
5980// Convenience function to create a DWARF expression for loading a register from
5981// a CFA offset.
5983 int64_t OffsetFromDefCFA) {
5984 // This assumes the top of the DWARF stack contains the CFA.
5985 Expr.push_back(dwarf::DW_OP_dup);
5986 // Add the offset to the register.
5987 appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus);
5988 // Dereference the address (loads a 64 bit value)..
5989 Expr.push_back(dwarf::DW_OP_deref);
5990}
5991
5992// Convenience function to create a comment for
5993// (+/-) NumBytes (* RegScale)?
5994static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
5995 StringRef RegScale = {}) {
5996 if (NumBytes) {
5997 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
5998 if (!RegScale.empty())
5999 Comment << ' ' << RegScale;
6000 }
6001}
6002
6003// Creates an MCCFIInstruction:
6004// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
6006 unsigned Reg,
6007 const StackOffset &Offset) {
6008 int64_t NumBytes, NumVGScaledBytes;
6009 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,
6010 NumVGScaledBytes);
6011 std::string CommentBuffer;
6012 llvm::raw_string_ostream Comment(CommentBuffer);
6013
6014 if (Reg == AArch64::SP)
6015 Comment << "sp";
6016 else if (Reg == AArch64::FP)
6017 Comment << "fp";
6018 else
6019 Comment << printReg(Reg, &TRI);
6020
6021 // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
6022 SmallString<64> Expr;
6023 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6024 assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
6025 // Reg + NumBytes
6026 Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
6027 appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
6028 appendOffsetComment(NumBytes, Comment);
6029 if (NumVGScaledBytes) {
6030 // + VG * NumVGScaledBytes
6031 appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
6032 appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
6033 appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
6034 Expr.push_back(dwarf::DW_OP_plus);
6035 }
6036
6037 // Wrap this into DW_CFA_def_cfa.
6038 SmallString<64> DefCfaExpr;
6039 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
6040 appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
6041 DefCfaExpr.append(Expr.str());
6042 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
6043 Comment.str());
6044}
6045
6047 unsigned FrameReg, unsigned Reg,
6048 const StackOffset &Offset,
6049 bool LastAdjustmentWasScalable) {
6050 if (Offset.getScalable())
6051 return createDefCFAExpression(TRI, Reg, Offset);
6052
6053 if (FrameReg == Reg && !LastAdjustmentWasScalable)
6054 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
6055
6056 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6057 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
6058}
6059
6062 const StackOffset &OffsetFromDefCFA,
6063 std::optional<int64_t> IncomingVGOffsetFromDefCFA) {
6064 int64_t NumBytes, NumVGScaledBytes;
6065 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6066 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
6067
6068 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6069
6070 // Non-scalable offsets can use DW_CFA_offset directly.
6071 if (!NumVGScaledBytes)
6072 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
6073
6074 std::string CommentBuffer;
6075 llvm::raw_string_ostream Comment(CommentBuffer);
6076 Comment << printReg(Reg, &TRI) << " @ cfa";
6077
6078 // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
6079 assert(NumVGScaledBytes && "Expected scalable offset");
6080 SmallString<64> OffsetExpr;
6081 // + VG * NumVGScaledBytes
6082 StringRef VGRegScale;
6083 if (IncomingVGOffsetFromDefCFA) {
6084 appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA);
6085 VGRegScale = "* IncomingVG";
6086 } else {
6087 appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
6088 VGRegScale = "* VG";
6089 }
6090 appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
6091 appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale);
6092 OffsetExpr.push_back(dwarf::DW_OP_plus);
6093 if (NumBytes) {
6094 // + NumBytes
6095 appendOffsetComment(NumBytes, Comment);
6096 appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
6097 }
6098
6099 // Wrap this into DW_CFA_expression
6100 SmallString<64> CfaExpr;
6101 CfaExpr.push_back(dwarf::DW_CFA_expression);
6102 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
6103 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
6104 CfaExpr.append(OffsetExpr.str());
6105
6106 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
6107 Comment.str());
6108}
6109
6110// Helper function to emit a frame offset adjustment from a given
6111// pointer (SrcReg), stored into DestReg. This function is explicit
6112// in that it requires the opcode.
6115 const DebugLoc &DL, unsigned DestReg,
6116 unsigned SrcReg, int64_t Offset, unsigned Opc,
6117 const TargetInstrInfo *TII,
6118 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
6119 bool *HasWinCFI, bool EmitCFAOffset,
6120 StackOffset CFAOffset, unsigned FrameReg) {
6121 int Sign = 1;
6122 unsigned MaxEncoding, ShiftSize;
6123 switch (Opc) {
6124 case AArch64::ADDXri:
6125 case AArch64::ADDSXri:
6126 case AArch64::SUBXri:
6127 case AArch64::SUBSXri:
6128 MaxEncoding = 0xfff;
6129 ShiftSize = 12;
6130 break;
6131 case AArch64::ADDVL_XXI:
6132 case AArch64::ADDPL_XXI:
6133 case AArch64::ADDSVL_XXI:
6134 case AArch64::ADDSPL_XXI:
6135 MaxEncoding = 31;
6136 ShiftSize = 0;
6137 if (Offset < 0) {
6138 MaxEncoding = 32;
6139 Sign = -1;
6140 Offset = -Offset;
6141 }
6142 break;
6143 default:
6144 llvm_unreachable("Unsupported opcode");
6145 }
6146
6147 // `Offset` can be in bytes or in "scalable bytes".
6148 int VScale = 1;
6149 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
6150 VScale = 16;
6151 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
6152 VScale = 2;
6153
6154 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
6155 // scratch register. If DestReg is a virtual register, use it as the
6156 // scratch register; otherwise, create a new virtual register (to be
6157 // replaced by the scavenger at the end of PEI). That case can be optimized
6158 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6159 // register can be loaded with offset%8 and the add/sub can use an extending
6160 // instruction with LSL#3.
6161 // Currently the function handles any offsets but generates a poor sequence
6162 // of code.
6163 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6164
6165 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6166 Register TmpReg = DestReg;
6167 if (TmpReg == AArch64::XZR)
6168 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
6169 &AArch64::GPR64RegClass);
6170 do {
6171 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
6172 unsigned LocalShiftSize = 0;
6173 if (ThisVal > MaxEncoding) {
6174 ThisVal = ThisVal >> ShiftSize;
6175 LocalShiftSize = ShiftSize;
6176 }
6177 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6178 "Encoding cannot handle value that big");
6179
6180 Offset -= ThisVal << LocalShiftSize;
6181 if (Offset == 0)
6182 TmpReg = DestReg;
6183 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
6184 .addReg(SrcReg)
6185 .addImm(Sign * (int)ThisVal);
6186 if (ShiftSize)
6187 MBI = MBI.addImm(
6189 MBI = MBI.setMIFlag(Flag);
6190
6191 auto Change =
6192 VScale == 1
6193 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
6194 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
6195 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
6196 CFAOffset += Change;
6197 else
6198 CFAOffset -= Change;
6199 if (EmitCFAOffset && DestReg == TmpReg) {
6200 MachineFunction &MF = *MBB.getParent();
6201 const TargetSubtargetInfo &STI = MF.getSubtarget();
6202 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6203
6204 unsigned CFIIndex = MF.addFrameInst(
6205 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
6206 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
6207 .addCFIIndex(CFIIndex)
6208 .setMIFlags(Flag);
6209 }
6210
6211 if (NeedsWinCFI) {
6212 int Imm = (int)(ThisVal << LocalShiftSize);
6213 if (VScale != 1 && DestReg == AArch64::SP) {
6214 if (HasWinCFI)
6215 *HasWinCFI = true;
6216 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ))
6217 .addImm(ThisVal)
6218 .setMIFlag(Flag);
6219 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
6220 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6221 assert(VScale == 1 && "Expected non-scalable operation");
6222 if (HasWinCFI)
6223 *HasWinCFI = true;
6224 if (Imm == 0)
6225 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
6226 else
6227 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
6228 .addImm(Imm)
6229 .setMIFlag(Flag);
6230 assert(Offset == 0 && "Expected remaining offset to be zero to "
6231 "emit a single SEH directive");
6232 } else if (DestReg == AArch64::SP) {
6233 assert(VScale == 1 && "Expected non-scalable operation");
6234 if (HasWinCFI)
6235 *HasWinCFI = true;
6236 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6237 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
6238 .addImm(Imm)
6239 .setMIFlag(Flag);
6240 }
6241 }
6242
6243 SrcReg = TmpReg;
6244 } while (Offset);
6245}
6246
6249 unsigned DestReg, unsigned SrcReg,
6251 MachineInstr::MIFlag Flag, bool SetNZCV,
6252 bool NeedsWinCFI, bool *HasWinCFI,
6253 bool EmitCFAOffset, StackOffset CFAOffset,
6254 unsigned FrameReg) {
6255 // If a function is marked as arm_locally_streaming, then the runtime value of
6256 // vscale in the prologue/epilogue is different the runtime value of vscale
6257 // in the function's body. To avoid having to consider multiple vscales,
6258 // we can use `addsvl` to allocate any scalable stack-slots, which under
6259 // most circumstances will be only locals, not callee-save slots.
6260 const Function &F = MBB.getParent()->getFunction();
6261 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
6262
6263 int64_t Bytes, NumPredicateVectors, NumDataVectors;
6264 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6265 Offset, Bytes, NumPredicateVectors, NumDataVectors);
6266
6267 // Insert ADDSXri for scalable offset at the end.
6268 bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors);
6269 if (NeedsFinalDefNZCV)
6270 SetNZCV = false;
6271
6272 // First emit non-scalable frame offsets, or a simple 'mov'.
6273 if (Bytes || (!Offset && SrcReg != DestReg)) {
6274 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
6275 "SP increment/decrement not 8-byte aligned");
6276 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
6277 if (Bytes < 0) {
6278 Bytes = -Bytes;
6279 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
6280 }
6281 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
6282 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6283 FrameReg);
6284 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
6285 ? StackOffset::getFixed(-Bytes)
6286 : StackOffset::getFixed(Bytes);
6287 SrcReg = DestReg;
6288 FrameReg = DestReg;
6289 }
6290
6291 assert(!(NeedsWinCFI && NumPredicateVectors) &&
6292 "WinCFI can't allocate fractions of an SVE data vector");
6293
6294 if (NumDataVectors) {
6295 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
6296 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
6297 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6298 FrameReg);
6299 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
6300 SrcReg = DestReg;
6301 }
6302
6303 if (NumPredicateVectors) {
6304 assert(DestReg != AArch64::SP && "Unaligned access to SP");
6305 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
6306 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
6307 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6308 FrameReg);
6309 }
6310
6311 if (NeedsFinalDefNZCV)
6312 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
6313 .addReg(DestReg)
6314 .addImm(0)
6315 .addImm(0);
6316}
6317
6320 MachineBasicBlock::iterator InsertPt, int FrameIndex,
6321 LiveIntervals *LIS, VirtRegMap *VRM) const {
6322 // This is a bit of a hack. Consider this instruction:
6323 //
6324 // %0 = COPY %sp; GPR64all:%0
6325 //
6326 // We explicitly chose GPR64all for the virtual register so such a copy might
6327 // be eliminated by RegisterCoalescer. However, that may not be possible, and
6328 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
6329 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
6330 //
6331 // To prevent that, we are going to constrain the %0 register class here.
6332 if (MI.isFullCopy()) {
6333 Register DstReg = MI.getOperand(0).getReg();
6334 Register SrcReg = MI.getOperand(1).getReg();
6335 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
6336 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
6337 return nullptr;
6338 }
6339 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
6340 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6341 return nullptr;
6342 }
6343 // Nothing can folded with copy from/to NZCV.
6344 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
6345 return nullptr;
6346 }
6347
6348 // Handle the case where a copy is being spilled or filled but the source
6349 // and destination register class don't match. For example:
6350 //
6351 // %0 = COPY %xzr; GPR64common:%0
6352 //
6353 // In this case we can still safely fold away the COPY and generate the
6354 // following spill code:
6355 //
6356 // STRXui %xzr, %stack.0
6357 //
6358 // This also eliminates spilled cross register class COPYs (e.g. between x and
6359 // d regs) of the same size. For example:
6360 //
6361 // %0 = COPY %1; GPR64:%0, FPR64:%1
6362 //
6363 // will be filled as
6364 //
6365 // LDRDui %0, fi<#0>
6366 //
6367 // instead of
6368 //
6369 // LDRXui %Temp, fi<#0>
6370 // %0 = FMOV %Temp
6371 //
6372 if (MI.isCopy() && Ops.size() == 1 &&
6373 // Make sure we're only folding the explicit COPY defs/uses.
6374 (Ops[0] == 0 || Ops[0] == 1)) {
6375 bool IsSpill = Ops[0] == 0;
6376 bool IsFill = !IsSpill;
6378 const MachineRegisterInfo &MRI = MF.getRegInfo();
6379 MachineBasicBlock &MBB = *MI.getParent();
6380 const MachineOperand &DstMO = MI.getOperand(0);
6381 const MachineOperand &SrcMO = MI.getOperand(1);
6382 Register DstReg = DstMO.getReg();
6383 Register SrcReg = SrcMO.getReg();
6384 // This is slightly expensive to compute for physical regs since
6385 // getMinimalPhysRegClass is slow.
6386 auto getRegClass = [&](unsigned Reg) {
6387 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6388 : TRI.getMinimalPhysRegClass(Reg);
6389 };
6390
6391 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
6392 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6393 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6394 "Mismatched register size in non subreg COPY");
6395 if (IsSpill)
6396 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
6397 getRegClass(SrcReg), &TRI, Register());
6398 else
6399 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
6400 getRegClass(DstReg), &TRI, Register());
6401 return &*--InsertPt;
6402 }
6403
6404 // Handle cases like spilling def of:
6405 //
6406 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6407 //
6408 // where the physical register source can be widened and stored to the full
6409 // virtual reg destination stack slot, in this case producing:
6410 //
6411 // STRXui %xzr, %stack.0
6412 //
6413 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6414 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
6415 assert(SrcMO.getSubReg() == 0 &&
6416 "Unexpected subreg on physical register");
6417 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
6418 FrameIndex, &AArch64::GPR64RegClass, &TRI,
6419 Register());
6420 return &*--InsertPt;
6421 }
6422
6423 // Handle cases like filling use of:
6424 //
6425 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6426 //
6427 // where we can load the full virtual reg source stack slot, into the subreg
6428 // destination, in this case producing:
6429 //
6430 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6431 //
6432 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6433 const TargetRegisterClass *FillRC = nullptr;
6434 switch (DstMO.getSubReg()) {
6435 default:
6436 break;
6437 case AArch64::sub_32:
6438 if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))
6439 FillRC = &AArch64::GPR32RegClass;
6440 break;
6441 case AArch64::ssub:
6442 FillRC = &AArch64::FPR32RegClass;
6443 break;
6444 case AArch64::dsub:
6445 FillRC = &AArch64::FPR64RegClass;
6446 break;
6447 }
6448
6449 if (FillRC) {
6450 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6451 TRI.getRegSizeInBits(*FillRC) &&
6452 "Mismatched regclass size on folded subreg COPY");
6453 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI,
6454 Register());
6455 MachineInstr &LoadMI = *--InsertPt;
6456 MachineOperand &LoadDst = LoadMI.getOperand(0);
6457 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6458 LoadDst.setSubReg(DstMO.getSubReg());
6459 LoadDst.setIsUndef();
6460 return &LoadMI;
6461 }
6462 }
6463 }
6464
6465 // Cannot fold.
6466 return nullptr;
6467}
6468
6470 StackOffset &SOffset,
6471 bool *OutUseUnscaledOp,
6472 unsigned *OutUnscaledOp,
6473 int64_t *EmittableOffset) {
6474 // Set output values in case of early exit.
6475 if (EmittableOffset)
6476 *EmittableOffset = 0;
6477 if (OutUseUnscaledOp)
6478 *OutUseUnscaledOp = false;
6479 if (OutUnscaledOp)
6480 *OutUnscaledOp = 0;
6481
6482 // Exit early for structured vector spills/fills as they can't take an
6483 // immediate offset.
6484 switch (MI.getOpcode()) {
6485 default:
6486 break;
6487 case AArch64::LD1Rv1d:
6488 case AArch64::LD1Rv2s:
6489 case AArch64::LD1Rv2d:
6490 case AArch64::LD1Rv4h:
6491 case AArch64::LD1Rv4s:
6492 case AArch64::LD1Rv8b:
6493 case AArch64::LD1Rv8h:
6494 case AArch64::LD1Rv16b:
6495 case AArch64::LD1Twov2d:
6496 case AArch64::LD1Threev2d:
6497 case AArch64::LD1Fourv2d:
6498 case AArch64::LD1Twov1d:
6499 case AArch64::LD1Threev1d:
6500 case AArch64::LD1Fourv1d:
6501 case AArch64::ST1Twov2d:
6502 case AArch64::ST1Threev2d:
6503 case AArch64::ST1Fourv2d:
6504 case AArch64::ST1Twov1d:
6505 case AArch64::ST1Threev1d:
6506 case AArch64::ST1Fourv1d:
6507 case AArch64::ST1i8:
6508 case AArch64::ST1i16:
6509 case AArch64::ST1i32:
6510 case AArch64::ST1i64:
6511 case AArch64::IRG:
6512 case AArch64::IRGstack:
6513 case AArch64::STGloop:
6514 case AArch64::STZGloop:
6516 }
6517
6518 // Get the min/max offset and the scale.
6519 TypeSize ScaleValue(0U, false), Width(0U, false);
6520 int64_t MinOff, MaxOff;
6521 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
6522 MaxOff))
6523 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6524
6525 // Construct the complete offset.
6526 bool IsMulVL = ScaleValue.isScalable();
6527 unsigned Scale = ScaleValue.getKnownMinValue();
6528 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6529
6530 const MachineOperand &ImmOpnd =
6531 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
6532 Offset += ImmOpnd.getImm() * Scale;
6533
6534 // If the offset doesn't match the scale, we rewrite the instruction to
6535 // use the unscaled instruction instead. Likewise, if we have a negative
6536 // offset and there is an unscaled op to use.
6537 std::optional<unsigned> UnscaledOp =
6539 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6540 if (useUnscaledOp &&
6541 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
6542 MaxOff))
6543 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6544
6545 Scale = ScaleValue.getKnownMinValue();
6546 assert(IsMulVL == ScaleValue.isScalable() &&
6547 "Unscaled opcode has different value for scalable");
6548
6549 int64_t Remainder = Offset % Scale;
6550 assert(!(Remainder && useUnscaledOp) &&
6551 "Cannot have remainder when using unscaled op");
6552
6553 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6554 int64_t NewOffset = Offset / Scale;
6555 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6556 Offset = Remainder;
6557 else {
6558 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6559 Offset = Offset - (NewOffset * Scale);
6560 }
6561
6562 if (EmittableOffset)
6563 *EmittableOffset = NewOffset;
6564 if (OutUseUnscaledOp)
6565 *OutUseUnscaledOp = useUnscaledOp;
6566 if (OutUnscaledOp && UnscaledOp)
6567 *OutUnscaledOp = *UnscaledOp;
6568
6569 if (IsMulVL)
6570 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
6571 else
6572 SOffset = StackOffset::get(Offset, SOffset.getScalable());
6574 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
6575}
6576
6578 unsigned FrameReg, StackOffset &Offset,
6579 const AArch64InstrInfo *TII) {
6580 unsigned Opcode = MI.getOpcode();
6581 unsigned ImmIdx = FrameRegIdx + 1;
6582
6583 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6584 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
6585 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
6586 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
6587 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
6588 MI.eraseFromParent();
6589 Offset = StackOffset();
6590 return true;
6591 }
6592
6593 int64_t NewOffset;
6594 unsigned UnscaledOp;
6595 bool UseUnscaledOp;
6596 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
6597 &UnscaledOp, &NewOffset);
6600 // Replace the FrameIndex with FrameReg.
6601 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
6602 if (UseUnscaledOp)
6603 MI.setDesc(TII->get(UnscaledOp));
6604
6605 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
6606 return !Offset;
6607 }
6608
6609 return false;
6610}
6611
6617
6619 return MCInstBuilder(AArch64::HINT).addImm(0);
6620}
6621
6622// AArch64 supports MachineCombiner.
6623bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6624
6625// True when Opc sets flag
6626static bool isCombineInstrSettingFlag(unsigned Opc) {
6627 switch (Opc) {
6628 case AArch64::ADDSWrr:
6629 case AArch64::ADDSWri:
6630 case AArch64::ADDSXrr:
6631 case AArch64::ADDSXri:
6632 case AArch64::SUBSWrr:
6633 case AArch64::SUBSXrr:
6634 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6635 case AArch64::SUBSWri:
6636 case AArch64::SUBSXri:
6637 return true;
6638 default:
6639 break;
6640 }
6641 return false;
6642}
6643
6644// 32b Opcodes that can be combined with a MUL
6645static bool isCombineInstrCandidate32(unsigned Opc) {
6646 switch (Opc) {
6647 case AArch64::ADDWrr:
6648 case AArch64::ADDWri:
6649 case AArch64::SUBWrr:
6650 case AArch64::ADDSWrr:
6651 case AArch64::ADDSWri:
6652 case AArch64::SUBSWrr:
6653 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6654 case AArch64::SUBWri:
6655 case AArch64::SUBSWri:
6656 return true;
6657 default:
6658 break;
6659 }
6660 return false;
6661}
6662
6663// 64b Opcodes that can be combined with a MUL
6664static bool isCombineInstrCandidate64(unsigned Opc) {
6665 switch (Opc) {
6666 case AArch64::ADDXrr:
6667 case AArch64::ADDXri:
6668 case AArch64::SUBXrr:
6669 case AArch64::ADDSXrr:
6670 case AArch64::ADDSXri:
6671 case AArch64::SUBSXrr:
6672 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6673 case AArch64::SUBXri:
6674 case AArch64::SUBSXri:
6675 case AArch64::ADDv8i8:
6676 case AArch64::ADDv16i8:
6677 case AArch64::ADDv4i16:
6678 case AArch64::ADDv8i16:
6679 case AArch64::ADDv2i32:
6680 case AArch64::ADDv4i32:
6681 case AArch64::SUBv8i8:
6682 case AArch64::SUBv16i8:
6683 case AArch64::SUBv4i16:
6684 case AArch64::SUBv8i16:
6685 case AArch64::SUBv2i32:
6686 case AArch64::SUBv4i32:
6687 return true;
6688 default:
6689 break;
6690 }
6691 return false;
6692}
6693
6694// FP Opcodes that can be combined with a FMUL.
6695static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
6696 switch (Inst.getOpcode()) {
6697 default:
6698 break;
6699 case AArch64::FADDHrr:
6700 case AArch64::FADDSrr:
6701 case AArch64::FADDDrr:
6702 case AArch64::FADDv4f16:
6703 case AArch64::FADDv8f16:
6704 case AArch64::FADDv2f32:
6705 case AArch64::FADDv2f64:
6706 case AArch64::FADDv4f32:
6707 case AArch64::FSUBHrr:
6708 case AArch64::FSUBSrr:
6709 case AArch64::FSUBDrr:
6710 case AArch64::FSUBv4f16:
6711 case AArch64::FSUBv8f16:
6712 case AArch64::FSUBv2f32:
6713 case AArch64::FSUBv2f64:
6714 case AArch64::FSUBv4f32:
6716 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
6717 // the target options or if FADD/FSUB has the contract fast-math flag.
6718 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
6720 }
6721 return false;
6722}
6723
6724// Opcodes that can be combined with a MUL
6728
6729//
6730// Utility routine that checks if \param MO is defined by an
6731// \param CombineOpc instruction in the basic block \param MBB
6733 unsigned CombineOpc, unsigned ZeroReg = 0,
6734 bool CheckZeroReg = false) {
6735 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
6736 MachineInstr *MI = nullptr;
6737
6738 if (MO.isReg() && MO.getReg().isVirtual())
6739 MI = MRI.getUniqueVRegDef(MO.getReg());
6740 // And it needs to be in the trace (otherwise, it won't have a depth).
6741 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
6742 return false;
6743 // Must only used by the user we combine with.
6744 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
6745 return false;
6746
6747 if (CheckZeroReg) {
6748 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
6749 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
6750 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
6751 // The third input reg must be zero.
6752 if (MI->getOperand(3).getReg() != ZeroReg)
6753 return false;
6754 }
6755
6756 if (isCombineInstrSettingFlag(CombineOpc) &&
6757 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
6758 return false;
6759
6760 return true;
6761}
6762
6763//
6764// Is \param MO defined by an integer multiply and can be combined?
6766 unsigned MulOpc, unsigned ZeroReg) {
6767 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
6768}
6769
6770//
6771// Is \param MO defined by a floating-point multiply and can be combined?
6773 unsigned MulOpc) {
6774 return canCombine(MBB, MO, MulOpc);
6775}
6776
6777// TODO: There are many more machine instruction opcodes to match:
6778// 1. Other data types (integer, vectors)
6779// 2. Other math / logic operations (xor, or)
6780// 3. Other forms of the same operation (intrinsics and other variants)
6781bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
6782 bool Invert) const {
6783 if (Invert)
6784 return false;
6785 switch (Inst.getOpcode()) {
6786 // == Floating-point types ==
6787 // -- Floating-point instructions --
6788 case AArch64::FADDHrr:
6789 case AArch64::FADDSrr:
6790 case AArch64::FADDDrr:
6791 case AArch64::FMULHrr:
6792 case AArch64::FMULSrr:
6793 case AArch64::FMULDrr:
6794 case AArch64::FMULX16:
6795 case AArch64::FMULX32:
6796 case AArch64::FMULX64:
6797 // -- Advanced SIMD instructions --
6798 case AArch64::FADDv4f16:
6799 case AArch64::FADDv8f16:
6800 case AArch64::FADDv2f32:
6801 case AArch64::FADDv4f32:
6802 case AArch64::FADDv2f64:
6803 case AArch64::FMULv4f16:
6804 case AArch64::FMULv8f16:
6805 case AArch64::FMULv2f32:
6806 case AArch64::FMULv4f32:
6807 case AArch64::FMULv2f64:
6808 case AArch64::FMULXv4f16:
6809 case AArch64::FMULXv8f16:
6810 case AArch64::FMULXv2f32:
6811 case AArch64::FMULXv4f32:
6812 case AArch64::FMULXv2f64:
6813 // -- SVE instructions --
6814 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
6815 // in the SVE instruction set (though there are predicated ones).
6816 case AArch64::FADD_ZZZ_H:
6817 case AArch64::FADD_ZZZ_S:
6818 case AArch64::FADD_ZZZ_D:
6819 case AArch64::FMUL_ZZZ_H:
6820 case AArch64::FMUL_ZZZ_S:
6821 case AArch64::FMUL_ZZZ_D:
6824
6825 // == Integer types ==
6826 // -- Base instructions --
6827 // Opcodes MULWrr and MULXrr don't exist because
6828 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
6829 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
6830 // The machine-combiner does not support three-source-operands machine
6831 // instruction. So we cannot reassociate MULs.
6832 case AArch64::ADDWrr:
6833 case AArch64::ADDXrr:
6834 case AArch64::ANDWrr:
6835 case AArch64::ANDXrr:
6836 case AArch64::ORRWrr:
6837 case AArch64::ORRXrr:
6838 case AArch64::EORWrr:
6839 case AArch64::EORXrr:
6840 case AArch64::EONWrr:
6841 case AArch64::EONXrr:
6842 // -- Advanced SIMD instructions --
6843 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
6844 // in the Advanced SIMD instruction set.
6845 case AArch64::ADDv8i8:
6846 case AArch64::ADDv16i8:
6847 case AArch64::ADDv4i16:
6848 case AArch64::ADDv8i16:
6849 case AArch64::ADDv2i32:
6850 case AArch64::ADDv4i32:
6851 case AArch64::ADDv1i64:
6852 case AArch64::ADDv2i64:
6853 case AArch64::MULv8i8:
6854 case AArch64::MULv16i8:
6855 case AArch64::MULv4i16:
6856 case AArch64::MULv8i16:
6857 case AArch64::MULv2i32:
6858 case AArch64::MULv4i32:
6859 case AArch64::ANDv8i8:
6860 case AArch64::ANDv16i8:
6861 case AArch64::ORRv8i8:
6862 case AArch64::ORRv16i8:
6863 case AArch64::EORv8i8:
6864 case AArch64::EORv16i8:
6865 // -- SVE instructions --
6866 case AArch64::ADD_ZZZ_B:
6867 case AArch64::ADD_ZZZ_H:
6868 case AArch64::ADD_ZZZ_S:
6869 case AArch64::ADD_ZZZ_D:
6870 case AArch64::MUL_ZZZ_B:
6871 case AArch64::MUL_ZZZ_H:
6872 case AArch64::MUL_ZZZ_S:
6873 case AArch64::MUL_ZZZ_D:
6874 case AArch64::AND_ZZZ:
6875 case AArch64::ORR_ZZZ:
6876 case AArch64::EOR_ZZZ:
6877 return true;
6878
6879 default:
6880 return false;
6881 }
6882}
6883
6884/// Find instructions that can be turned into madd.
6886 SmallVectorImpl<unsigned> &Patterns) {
6887 unsigned Opc = Root.getOpcode();
6888 MachineBasicBlock &MBB = *Root.getParent();
6889 bool Found = false;
6890
6892 return false;
6894 int Cmp_NZCV =
6895 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
6896 // When NZCV is live bail out.
6897 if (Cmp_NZCV == -1)
6898 return false;
6899 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
6900 // When opcode can't change bail out.
6901 // CHECKME: do we miss any cases for opcode conversion?
6902 if (NewOpc == Opc)
6903 return false;
6904 Opc = NewOpc;
6905 }
6906
6907 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
6908 unsigned Pattern) {
6909 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
6910 Patterns.push_back(Pattern);
6911 Found = true;
6912 }
6913 };
6914
6915 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
6916 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
6917 Patterns.push_back(Pattern);
6918 Found = true;
6919 }
6920 };
6921
6923
6924 switch (Opc) {
6925 default:
6926 break;
6927 case AArch64::ADDWrr:
6928 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
6929 "ADDWrr does not have register operands");
6930 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
6931 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
6932 break;
6933 case AArch64::ADDXrr:
6934 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
6935 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
6936 break;
6937 case AArch64::SUBWrr:
6938 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
6939 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
6940 break;
6941 case AArch64::SUBXrr:
6942 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
6943 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
6944 break;
6945 case AArch64::ADDWri:
6946 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
6947 break;
6948 case AArch64::ADDXri:
6949 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
6950 break;
6951 case AArch64::SUBWri:
6952 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
6953 break;
6954 case AArch64::SUBXri:
6955 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
6956 break;
6957 case AArch64::ADDv8i8:
6958 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
6959 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
6960 break;
6961 case AArch64::ADDv16i8:
6962 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
6963 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
6964 break;
6965 case AArch64::ADDv4i16:
6966 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
6967 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
6968 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
6969 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
6970 break;
6971 case AArch64::ADDv8i16:
6972 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
6973 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
6974 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
6975 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
6976 break;
6977 case AArch64::ADDv2i32:
6978 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
6979 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
6980 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
6981 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
6982 break;
6983 case AArch64::ADDv4i32:
6984 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
6985 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
6986 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
6987 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
6988 break;
6989 case AArch64::SUBv8i8:
6990 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
6991 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
6992 break;
6993 case AArch64::SUBv16i8:
6994 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
6995 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
6996 break;
6997 case AArch64::SUBv4i16:
6998 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
6999 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
7000 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
7001 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
7002 break;
7003 case AArch64::SUBv8i16:
7004 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
7005 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
7006 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
7007 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
7008 break;
7009 case AArch64::SUBv2i32:
7010 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
7011 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
7012 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
7013 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
7014 break;
7015 case AArch64::SUBv4i32:
7016 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
7017 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
7018 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
7019 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
7020 break;
7021 }
7022 return Found;
7023}
7024
7025bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
7026 switch (Opcode) {
7027 default:
7028 break;
7029 case AArch64::UABALB_ZZZ_D:
7030 case AArch64::UABALB_ZZZ_H:
7031 case AArch64::UABALB_ZZZ_S:
7032 case AArch64::UABALT_ZZZ_D:
7033 case AArch64::UABALT_ZZZ_H:
7034 case AArch64::UABALT_ZZZ_S:
7035 case AArch64::SABALB_ZZZ_D:
7036 case AArch64::SABALB_ZZZ_S:
7037 case AArch64::SABALB_ZZZ_H:
7038 case AArch64::SABALT_ZZZ_D:
7039 case AArch64::SABALT_ZZZ_S:
7040 case AArch64::SABALT_ZZZ_H:
7041 case AArch64::UABALv16i8_v8i16:
7042 case AArch64::UABALv2i32_v2i64:
7043 case AArch64::UABALv4i16_v4i32:
7044 case AArch64::UABALv4i32_v2i64:
7045 case AArch64::UABALv8i16_v4i32:
7046 case AArch64::UABALv8i8_v8i16:
7047 case AArch64::UABAv16i8:
7048 case AArch64::UABAv2i32:
7049 case AArch64::UABAv4i16:
7050 case AArch64::UABAv4i32:
7051 case AArch64::UABAv8i16:
7052 case AArch64::UABAv8i8:
7053 case AArch64::SABALv16i8_v8i16:
7054 case AArch64::SABALv2i32_v2i64:
7055 case AArch64::SABALv4i16_v4i32:
7056 case AArch64::SABALv4i32_v2i64:
7057 case AArch64::SABALv8i16_v4i32:
7058 case AArch64::SABALv8i8_v8i16:
7059 case AArch64::SABAv16i8:
7060 case AArch64::SABAv2i32:
7061 case AArch64::SABAv4i16:
7062 case AArch64::SABAv4i32:
7063 case AArch64::SABAv8i16:
7064 case AArch64::SABAv8i8:
7065 return true;
7066 }
7067
7068 return false;
7069}
7070
7071unsigned AArch64InstrInfo::getAccumulationStartOpcode(
7072 unsigned AccumulationOpcode) const {
7073 switch (AccumulationOpcode) {
7074 default:
7075 llvm_unreachable("Unsupported accumulation Opcode!");
7076 case AArch64::UABALB_ZZZ_D:
7077 return AArch64::UABDLB_ZZZ_D;
7078 case AArch64::UABALB_ZZZ_H:
7079 return AArch64::UABDLB_ZZZ_H;
7080 case AArch64::UABALB_ZZZ_S:
7081 return AArch64::UABDLB_ZZZ_S;
7082 case AArch64::UABALT_ZZZ_D:
7083 return AArch64::UABDLT_ZZZ_D;
7084 case AArch64::UABALT_ZZZ_H:
7085 return AArch64::UABDLT_ZZZ_H;
7086 case AArch64::UABALT_ZZZ_S:
7087 return AArch64::UABDLT_ZZZ_S;
7088 case AArch64::UABALv16i8_v8i16:
7089 return AArch64::UABDLv16i8_v8i16;
7090 case AArch64::UABALv2i32_v2i64:
7091 return AArch64::UABDLv2i32_v2i64;
7092 case AArch64::UABALv4i16_v4i32:
7093 return AArch64::UABDLv4i16_v4i32;
7094 case AArch64::UABALv4i32_v2i64:
7095 return AArch64::UABDLv4i32_v2i64;
7096 case AArch64::UABALv8i16_v4i32:
7097 return AArch64::UABDLv8i16_v4i32;
7098 case AArch64::UABALv8i8_v8i16:
7099 return AArch64::UABDLv8i8_v8i16;
7100 case AArch64::UABAv16i8:
7101 return AArch64::UABDv16i8;
7102 case AArch64::UABAv2i32:
7103 return AArch64::UABDv2i32;
7104 case AArch64::UABAv4i16:
7105 return AArch64::UABDv4i16;
7106 case AArch64::UABAv4i32:
7107 return AArch64::UABDv4i32;
7108 case AArch64::UABAv8i16:
7109 return AArch64::UABDv8i16;
7110 case AArch64::UABAv8i8:
7111 return AArch64::UABDv8i8;
7112 case AArch64::SABALB_ZZZ_D:
7113 return AArch64::SABDLB_ZZZ_D;
7114 case AArch64::SABALB_ZZZ_S:
7115 return AArch64::SABDLB_ZZZ_S;
7116 case AArch64::SABALB_ZZZ_H:
7117 return AArch64::SABDLB_ZZZ_H;
7118 case AArch64::SABALT_ZZZ_D:
7119 return AArch64::SABDLT_ZZZ_D;
7120 case AArch64::SABALT_ZZZ_S:
7121 return AArch64::SABDLT_ZZZ_S;
7122 case AArch64::SABALT_ZZZ_H:
7123 return AArch64::SABDLT_ZZZ_H;
7124 case AArch64::SABALv16i8_v8i16:
7125 return AArch64::SABDLv16i8_v8i16;
7126 case AArch64::SABALv2i32_v2i64:
7127 return AArch64::SABDLv2i32_v2i64;
7128 case AArch64::SABALv4i16_v4i32:
7129 return AArch64::SABDLv4i16_v4i32;
7130 case AArch64::SABALv4i32_v2i64:
7131 return AArch64::SABDLv4i32_v2i64;
7132 case AArch64::SABALv8i16_v4i32:
7133 return AArch64::SABDLv8i16_v4i32;
7134 case AArch64::SABALv8i8_v8i16:
7135 return AArch64::SABDLv8i8_v8i16;
7136 case AArch64::SABAv16i8:
7137 return AArch64::SABDv16i8;
7138 case AArch64::SABAv2i32:
7139 return AArch64::SABAv2i32;
7140 case AArch64::SABAv4i16:
7141 return AArch64::SABDv4i16;
7142 case AArch64::SABAv4i32:
7143 return AArch64::SABDv4i32;
7144 case AArch64::SABAv8i16:
7145 return AArch64::SABDv8i16;
7146 case AArch64::SABAv8i8:
7147 return AArch64::SABDv8i8;
7148 }
7149}
7150
7151/// Floating-Point Support
7152
7153/// Find instructions that can be turned into madd.
7155 SmallVectorImpl<unsigned> &Patterns) {
7156
7157 if (!isCombineInstrCandidateFP(Root))
7158 return false;
7159
7160 MachineBasicBlock &MBB = *Root.getParent();
7161 bool Found = false;
7162
7163 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7164 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
7165 Patterns.push_back(Pattern);
7166 return true;
7167 }
7168 return false;
7169 };
7170
7172
7173 switch (Root.getOpcode()) {
7174 default:
7175 assert(false && "Unsupported FP instruction in combiner\n");
7176 break;
7177 case AArch64::FADDHrr:
7178 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7179 "FADDHrr does not have register operands");
7180
7181 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
7182 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
7183 break;
7184 case AArch64::FADDSrr:
7185 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7186 "FADDSrr does not have register operands");
7187
7188 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
7189 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
7190
7191 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
7192 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
7193 break;
7194 case AArch64::FADDDrr:
7195 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
7196 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
7197
7198 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
7199 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
7200 break;
7201 case AArch64::FADDv4f16:
7202 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
7203 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
7204
7205 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
7206 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
7207 break;
7208 case AArch64::FADDv8f16:
7209 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
7210 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
7211
7212 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
7213 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
7214 break;
7215 case AArch64::FADDv2f32:
7216 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
7217 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
7218
7219 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
7220 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
7221 break;
7222 case AArch64::FADDv2f64:
7223 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
7224 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
7225
7226 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
7227 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
7228 break;
7229 case AArch64::FADDv4f32:
7230 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
7231 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
7232
7233 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
7234 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
7235 break;
7236 case AArch64::FSUBHrr:
7237 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
7238 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
7239 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
7240 break;
7241 case AArch64::FSUBSrr:
7242 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
7243
7244 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
7245 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
7246
7247 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
7248 break;
7249 case AArch64::FSUBDrr:
7250 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
7251
7252 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
7253 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
7254
7255 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
7256 break;
7257 case AArch64::FSUBv4f16:
7258 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
7259 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
7260
7261 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
7262 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
7263 break;
7264 case AArch64::FSUBv8f16:
7265 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
7266 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
7267
7268 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
7269 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
7270 break;
7271 case AArch64::FSUBv2f32:
7272 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
7273 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
7274
7275 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
7276 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
7277 break;
7278 case AArch64::FSUBv2f64:
7279 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
7280 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
7281
7282 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
7283 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
7284 break;
7285 case AArch64::FSUBv4f32:
7286 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
7287 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
7288
7289 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
7290 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
7291 break;
7292 }
7293 return Found;
7294}
7295
7297 SmallVectorImpl<unsigned> &Patterns) {
7298 MachineBasicBlock &MBB = *Root.getParent();
7299 bool Found = false;
7300
7301 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
7302 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7303 MachineOperand &MO = Root.getOperand(Operand);
7304 MachineInstr *MI = nullptr;
7305 if (MO.isReg() && MO.getReg().isVirtual())
7306 MI = MRI.getUniqueVRegDef(MO.getReg());
7307 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
7308 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
7309 MI->getOperand(1).getReg().isVirtual())
7310 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
7311 if (MI && MI->getOpcode() == Opcode) {
7312 Patterns.push_back(Pattern);
7313 return true;
7314 }
7315 return false;
7316 };
7317
7319
7320 switch (Root.getOpcode()) {
7321 default:
7322 return false;
7323 case AArch64::FMULv2f32:
7324 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
7325 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
7326 break;
7327 case AArch64::FMULv2f64:
7328 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
7329 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
7330 break;
7331 case AArch64::FMULv4f16:
7332 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
7333 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
7334 break;
7335 case AArch64::FMULv4f32:
7336 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
7337 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
7338 break;
7339 case AArch64::FMULv8f16:
7340 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
7341 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
7342 break;
7343 }
7344
7345 return Found;
7346}
7347
7349 SmallVectorImpl<unsigned> &Patterns) {
7350 unsigned Opc = Root.getOpcode();
7351 MachineBasicBlock &MBB = *Root.getParent();
7352 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7353
7354 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
7355 MachineOperand &MO = Root.getOperand(1);
7356 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
7357 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
7358 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
7362 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
7363 Patterns.push_back(Pattern);
7364 return true;
7365 }
7366 return false;
7367 };
7368
7369 switch (Opc) {
7370 default:
7371 break;
7372 case AArch64::FNEGDr:
7373 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
7374 case AArch64::FNEGSr:
7375 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
7376 }
7377
7378 return false;
7379}
7380
7381/// Return true when a code sequence can improve throughput. It
7382/// should be called only for instructions in loops.
7383/// \param Pattern - combiner pattern
7385 switch (Pattern) {
7386 default:
7387 break;
7493 return true;
7494 } // end switch (Pattern)
7495 return false;
7496}
7497
7498/// Find other MI combine patterns.
7500 SmallVectorImpl<unsigned> &Patterns) {
7501 // A - (B + C) ==> (A - B) - C or (A - C) - B
7502 unsigned Opc = Root.getOpcode();
7503 MachineBasicBlock &MBB = *Root.getParent();
7504
7505 switch (Opc) {
7506 case AArch64::SUBWrr:
7507 case AArch64::SUBSWrr:
7508 case AArch64::SUBXrr:
7509 case AArch64::SUBSXrr:
7510 // Found candidate root.
7511 break;
7512 default:
7513 return false;
7514 }
7515
7517 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
7518 -1)
7519 return false;
7520
7521 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
7522 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
7523 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
7524 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
7527 return true;
7528 }
7529
7530 return false;
7531}
7532
7533/// Check if the given instruction forms a gather load pattern that can be
7534/// optimized for better Memory-Level Parallelism (MLP). This function
7535/// identifies chains of NEON lane load instructions that load data from
7536/// different memory addresses into individual lanes of a 128-bit vector
7537/// register, then attempts to split the pattern into parallel loads to break
7538/// the serial dependency between instructions.
7539///
7540/// Pattern Matched:
7541/// Initial scalar load -> SUBREG_TO_REG (lane 0) -> LD1i* (lane 1) ->
7542/// LD1i* (lane 2) -> ... -> LD1i* (lane N-1, Root)
7543///
7544/// Transformed Into:
7545/// Two parallel vector loads using fewer lanes each, followed by ZIP1v2i64
7546/// to combine the results, enabling better memory-level parallelism.
7547///
7548/// Supported Element Types:
7549/// - 32-bit elements (LD1i32, 4 lanes total)
7550/// - 16-bit elements (LD1i16, 8 lanes total)
7551/// - 8-bit elements (LD1i8, 16 lanes total)
7553 SmallVectorImpl<unsigned> &Patterns,
7554 unsigned LoadLaneOpCode, unsigned NumLanes) {
7555 const MachineFunction *MF = Root.getMF();
7556
7557 // Early exit if optimizing for size.
7558 if (MF->getFunction().hasMinSize())
7559 return false;
7560
7561 const MachineRegisterInfo &MRI = MF->getRegInfo();
7563
7564 // The root of the pattern must load into the last lane of the vector.
7565 if (Root.getOperand(2).getImm() != NumLanes - 1)
7566 return false;
7567
7568 // Check that we have load into all lanes except lane 0.
7569 // For each load we also want to check that:
7570 // 1. It has a single non-debug use (since we will be replacing the virtual
7571 // register)
7572 // 2. That the addressing mode only uses a single pointer operand
7573 auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7574 auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
7575 SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
7577 while (!RemainingLanes.empty() && CurrInstr &&
7578 CurrInstr->getOpcode() == LoadLaneOpCode &&
7579 MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
7580 CurrInstr->getNumOperands() == 4) {
7581 RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
7582 LoadInstrs.push_back(CurrInstr);
7583 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7584 }
7585
7586 // Check that we have found a match for lanes N-1.. 1.
7587 if (!RemainingLanes.empty())
7588 return false;
7589
7590 // Match the SUBREG_TO_REG sequence.
7591 if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
7592 return false;
7593
7594 // Verify that the subreg to reg loads an integer into the first lane.
7595 auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
7596 unsigned SingleLaneSizeInBits = 128 / NumLanes;
7597 if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
7598 return false;
7599
7600 // Verify that it also has a single non debug use.
7601 if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
7602 return false;
7603
7604 LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));
7605
7606 // If there is any chance of aliasing, do not apply the pattern.
7607 // Walk backward through the MBB starting from Root.
7608 // Exit early if we've encountered all load instructions or hit the search
7609 // limit.
7610 auto MBBItr = Root.getIterator();
7611 unsigned RemainingSteps = GatherOptSearchLimit;
7612 SmallPtrSet<const MachineInstr *, 16> RemainingLoadInstrs;
7613 RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());
7614 const MachineBasicBlock *MBB = Root.getParent();
7615
7616 for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&
7617 !RemainingLoadInstrs.empty();
7618 --MBBItr, --RemainingSteps) {
7619 const MachineInstr &CurrInstr = *MBBItr;
7620
7621 // Remove this instruction from remaining loads if it's one we're tracking.
7622 RemainingLoadInstrs.erase(&CurrInstr);
7623
7624 // Check for potential aliasing with any of the load instructions to
7625 // optimize.
7626 if (CurrInstr.isLoadFoldBarrier())
7627 return false;
7628 }
7629
7630 // If we hit the search limit without finding all load instructions,
7631 // don't match the pattern.
7632 if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())
7633 return false;
7634
7635 switch (NumLanes) {
7636 case 4:
7638 break;
7639 case 8:
7641 break;
7642 case 16:
7644 break;
7645 default:
7646 llvm_unreachable("Got bad number of lanes for gather pattern.");
7647 }
7648
7649 return true;
7650}
7651
7652/// Search for patterns of LD instructions we can optimize.
7654 SmallVectorImpl<unsigned> &Patterns) {
7655
7656 // The pattern searches for loads into single lanes.
7657 switch (Root.getOpcode()) {
7658 case AArch64::LD1i32:
7659 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 4);
7660 case AArch64::LD1i16:
7661 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 8);
7662 case AArch64::LD1i8:
7663 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 16);
7664 default:
7665 return false;
7666 }
7667}
7668
7669/// Generate optimized instruction sequence for gather load patterns to improve
7670/// Memory-Level Parallelism (MLP). This function transforms a chain of
7671/// sequential NEON lane loads into parallel vector loads that can execute
7672/// concurrently.
7673static void
7677 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
7678 unsigned Pattern, unsigned NumLanes) {
7679 MachineFunction &MF = *Root.getParent()->getParent();
7682
7683 // Gather the initial load instructions to build the pattern.
7684 SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
7685 MachineInstr *CurrInstr = &Root;
7686 for (unsigned i = 0; i < NumLanes - 1; ++i) {
7687 LoadToLaneInstrs.push_back(CurrInstr);
7688 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7689 }
7690
7691 // Sort the load instructions according to the lane.
7692 llvm::sort(LoadToLaneInstrs,
7693 [](const MachineInstr *A, const MachineInstr *B) {
7694 return A->getOperand(2).getImm() > B->getOperand(2).getImm();
7695 });
7696
7697 MachineInstr *SubregToReg = CurrInstr;
7698 LoadToLaneInstrs.push_back(
7699 MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));
7700 auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
7701
7702 const TargetRegisterClass *FPR128RegClass =
7703 MRI.getRegClass(Root.getOperand(0).getReg());
7704
7705 // Helper lambda to create a LD1 instruction.
7706 auto CreateLD1Instruction = [&](MachineInstr *OriginalInstr,
7707 Register SrcRegister, unsigned Lane,
7708 Register OffsetRegister,
7709 bool OffsetRegisterKillState) {
7710 auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
7711 MachineInstrBuilder LoadIndexIntoRegister =
7712 BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
7713 NewRegister)
7714 .addReg(SrcRegister)
7715 .addImm(Lane)
7716 .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
7717 InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
7718 InsInstrs.push_back(LoadIndexIntoRegister);
7719 return NewRegister;
7720 };
7721
7722 // Helper to create load instruction based on the NumLanes in the NEON
7723 // register we are rewriting.
7724 auto CreateLDRInstruction = [&](unsigned NumLanes, Register DestReg,
7725 Register OffsetReg,
7726 bool KillState) -> MachineInstrBuilder {
7727 unsigned Opcode;
7728 switch (NumLanes) {
7729 case 4:
7730 Opcode = AArch64::LDRSui;
7731 break;
7732 case 8:
7733 Opcode = AArch64::LDRHui;
7734 break;
7735 case 16:
7736 Opcode = AArch64::LDRBui;
7737 break;
7738 default:
7740 "Got unsupported number of lanes in machine-combiner gather pattern");
7741 }
7742 // Immediate offset load
7743 return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
7744 .addReg(OffsetReg)
7745 .addImm(0);
7746 };
7747
7748 // Load the remaining lanes into register 0.
7749 auto LanesToLoadToReg0 =
7750 llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
7751 LoadToLaneInstrsAscending.begin() + NumLanes / 2);
7752 Register PrevReg = SubregToReg->getOperand(0).getReg();
7753 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
7754 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7755 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7756 OffsetRegOperand.getReg(),
7757 OffsetRegOperand.isKill());
7758 DelInstrs.push_back(LoadInstr);
7759 }
7760 Register LastLoadReg0 = PrevReg;
7761
7762 // First load into register 1. Perform an integer load to zero out the upper
7763 // lanes in a single instruction.
7764 MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
7765 MachineInstr *OriginalSplitLoad =
7766 *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
7767 Register DestRegForMiddleIndex = MRI.createVirtualRegister(
7768 MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
7769
7770 const MachineOperand &OriginalSplitToLoadOffsetOperand =
7771 OriginalSplitLoad->getOperand(3);
7772 MachineInstrBuilder MiddleIndexLoadInstr =
7773 CreateLDRInstruction(NumLanes, DestRegForMiddleIndex,
7774 OriginalSplitToLoadOffsetOperand.getReg(),
7775 OriginalSplitToLoadOffsetOperand.isKill());
7776
7777 InstrIdxForVirtReg.insert(
7778 std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
7779 InsInstrs.push_back(MiddleIndexLoadInstr);
7780 DelInstrs.push_back(OriginalSplitLoad);
7781
7782 // Subreg To Reg instruction for register 1.
7783 Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
7784 unsigned SubregType;
7785 switch (NumLanes) {
7786 case 4:
7787 SubregType = AArch64::ssub;
7788 break;
7789 case 8:
7790 SubregType = AArch64::hsub;
7791 break;
7792 case 16:
7793 SubregType = AArch64::bsub;
7794 break;
7795 default:
7797 "Got invalid NumLanes for machine-combiner gather pattern");
7798 }
7799
7800 auto SubRegToRegInstr =
7801 BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
7802 DestRegForSubregToReg)
7803 .addImm(0)
7804 .addReg(DestRegForMiddleIndex, getKillRegState(true))
7805 .addImm(SubregType);
7806 InstrIdxForVirtReg.insert(
7807 std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
7808 InsInstrs.push_back(SubRegToRegInstr);
7809
7810 // Load remaining lanes into register 1.
7811 auto LanesToLoadToReg1 =
7812 llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
7813 LoadToLaneInstrsAscending.end());
7814 PrevReg = SubRegToRegInstr->getOperand(0).getReg();
7815 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
7816 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
7817 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
7818 OffsetRegOperand.getReg(),
7819 OffsetRegOperand.isKill());
7820
7821 // Do not add the last reg to DelInstrs - it will be removed later.
7822 if (Index == NumLanes / 2 - 2) {
7823 break;
7824 }
7825 DelInstrs.push_back(LoadInstr);
7826 }
7827 Register LastLoadReg1 = PrevReg;
7828
7829 // Create the final zip instruction to combine the results.
7830 MachineInstrBuilder ZipInstr =
7831 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
7832 Root.getOperand(0).getReg())
7833 .addReg(LastLoadReg0)
7834 .addReg(LastLoadReg1);
7835 InsInstrs.push_back(ZipInstr);
7836}
7837
7851
7852/// Return true when there is potentially a faster code sequence for an
7853/// instruction chain ending in \p Root. All potential patterns are listed in
7854/// the \p Pattern vector. Pattern should be sorted in priority order since the
7855/// pattern evaluator stops checking as soon as it finds a faster sequence.
7856
7857bool AArch64InstrInfo::getMachineCombinerPatterns(
7858 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
7859 bool DoRegPressureReduce) const {
7860 // Integer patterns
7861 if (getMaddPatterns(Root, Patterns))
7862 return true;
7863 // Floating point patterns
7864 if (getFMULPatterns(Root, Patterns))
7865 return true;
7866 if (getFMAPatterns(Root, Patterns))
7867 return true;
7868 if (getFNEGPatterns(Root, Patterns))
7869 return true;
7870
7871 // Other patterns
7872 if (getMiscPatterns(Root, Patterns))
7873 return true;
7874
7875 // Load patterns
7876 if (getLoadPatterns(Root, Patterns))
7877 return true;
7878
7879 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
7880 DoRegPressureReduce);
7881}
7882
7884/// genFusedMultiply - Generate fused multiply instructions.
7885/// This function supports both integer and floating point instructions.
7886/// A typical example:
7887/// F|MUL I=A,B,0
7888/// F|ADD R,I,C
7889/// ==> F|MADD R,A,B,C
7890/// \param MF Containing MachineFunction
7891/// \param MRI Register information
7892/// \param TII Target information
7893/// \param Root is the F|ADD instruction
7894/// \param [out] InsInstrs is a vector of machine instructions and will
7895/// contain the generated madd instruction
7896/// \param IdxMulOpd is index of operand in Root that is the result of
7897/// the F|MUL. In the example above IdxMulOpd is 1.
7898/// \param MaddOpc the opcode fo the f|madd instruction
7899/// \param RC Register class of operands
7900/// \param kind of fma instruction (addressing mode) to be generated
7901/// \param ReplacedAddend is the result register from the instruction
7902/// replacing the non-combined operand, if any.
7903static MachineInstr *
7905 const TargetInstrInfo *TII, MachineInstr &Root,
7906 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
7907 unsigned MaddOpc, const TargetRegisterClass *RC,
7909 const Register *ReplacedAddend = nullptr) {
7910 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
7911
7912 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
7913 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
7914 Register ResultReg = Root.getOperand(0).getReg();
7915 Register SrcReg0 = MUL->getOperand(1).getReg();
7916 bool Src0IsKill = MUL->getOperand(1).isKill();
7917 Register SrcReg1 = MUL->getOperand(2).getReg();
7918 bool Src1IsKill = MUL->getOperand(2).isKill();
7919
7920 Register SrcReg2;
7921 bool Src2IsKill;
7922 if (ReplacedAddend) {
7923 // If we just generated a new addend, we must be it's only use.
7924 SrcReg2 = *ReplacedAddend;
7925 Src2IsKill = true;
7926 } else {
7927 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
7928 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
7929 }
7930
7931 if (ResultReg.isVirtual())
7932 MRI.constrainRegClass(ResultReg, RC);
7933 if (SrcReg0.isVirtual())
7934 MRI.constrainRegClass(SrcReg0, RC);
7935 if (SrcReg1.isVirtual())
7936 MRI.constrainRegClass(SrcReg1, RC);
7937 if (SrcReg2.isVirtual())
7938 MRI.constrainRegClass(SrcReg2, RC);
7939
7941 if (kind == FMAInstKind::Default)
7942 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7943 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7944 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7945 .addReg(SrcReg2, getKillRegState(Src2IsKill));
7946 else if (kind == FMAInstKind::Indexed)
7947 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7948 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7949 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7950 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7951 .addImm(MUL->getOperand(3).getImm());
7952 else if (kind == FMAInstKind::Accumulator)
7953 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
7954 .addReg(SrcReg2, getKillRegState(Src2IsKill))
7955 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7956 .addReg(SrcReg1, getKillRegState(Src1IsKill));
7957 else
7958 assert(false && "Invalid FMA instruction kind \n");
7959 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
7960 InsInstrs.push_back(MIB);
7961 return MUL;
7962}
7963
7964static MachineInstr *
7966 const TargetInstrInfo *TII, MachineInstr &Root,
7968 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7969
7970 unsigned Opc = 0;
7971 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
7972 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
7973 Opc = AArch64::FNMADDSrrr;
7974 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
7975 Opc = AArch64::FNMADDDrrr;
7976 else
7977 return nullptr;
7978
7979 Register ResultReg = Root.getOperand(0).getReg();
7980 Register SrcReg0 = MAD->getOperand(1).getReg();
7981 Register SrcReg1 = MAD->getOperand(2).getReg();
7982 Register SrcReg2 = MAD->getOperand(3).getReg();
7983 bool Src0IsKill = MAD->getOperand(1).isKill();
7984 bool Src1IsKill = MAD->getOperand(2).isKill();
7985 bool Src2IsKill = MAD->getOperand(3).isKill();
7986 if (ResultReg.isVirtual())
7987 MRI.constrainRegClass(ResultReg, RC);
7988 if (SrcReg0.isVirtual())
7989 MRI.constrainRegClass(SrcReg0, RC);
7990 if (SrcReg1.isVirtual())
7991 MRI.constrainRegClass(SrcReg1, RC);
7992 if (SrcReg2.isVirtual())
7993 MRI.constrainRegClass(SrcReg2, RC);
7994
7996 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
7997 .addReg(SrcReg0, getKillRegState(Src0IsKill))
7998 .addReg(SrcReg1, getKillRegState(Src1IsKill))
7999 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8000 InsInstrs.push_back(MIB);
8001
8002 return MAD;
8003}
8004
8005/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
8006static MachineInstr *
8009 unsigned IdxDupOp, unsigned MulOpc,
8011 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
8012 "Invalid index of FMUL operand");
8013
8014 MachineFunction &MF = *Root.getMF();
8016
8017 MachineInstr *Dup =
8018 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
8019
8020 if (Dup->getOpcode() == TargetOpcode::COPY)
8021 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
8022
8023 Register DupSrcReg = Dup->getOperand(1).getReg();
8024 MRI.clearKillFlags(DupSrcReg);
8025 MRI.constrainRegClass(DupSrcReg, RC);
8026
8027 unsigned DupSrcLane = Dup->getOperand(2).getImm();
8028
8029 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
8030 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
8031
8032 Register ResultReg = Root.getOperand(0).getReg();
8033
8035 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
8036 .add(MulOp)
8037 .addReg(DupSrcReg)
8038 .addImm(DupSrcLane);
8039
8040 InsInstrs.push_back(MIB);
8041 return &Root;
8042}
8043
8044/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
8045/// instructions.
8046///
8047/// \see genFusedMultiply
8051 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8052 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8054}
8055
8056/// genNeg - Helper to generate an intermediate negation of the second operand
8057/// of Root
8059 const TargetInstrInfo *TII, MachineInstr &Root,
8061 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8062 unsigned MnegOpc, const TargetRegisterClass *RC) {
8063 Register NewVR = MRI.createVirtualRegister(RC);
8065 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
8066 .add(Root.getOperand(2));
8067 InsInstrs.push_back(MIB);
8068
8069 assert(InstrIdxForVirtReg.empty());
8070 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8071
8072 return NewVR;
8073}
8074
8075/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8076/// instructions with an additional negation of the accumulator
8080 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8081 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8082 assert(IdxMulOpd == 1);
8083
8084 Register NewVR =
8085 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8086 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8087 FMAInstKind::Accumulator, &NewVR);
8088}
8089
8090/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
8091/// instructions.
8092///
8093/// \see genFusedMultiply
8097 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8098 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8100}
8101
8102/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8103/// instructions with an additional negation of the accumulator
8107 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8108 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8109 assert(IdxMulOpd == 1);
8110
8111 Register NewVR =
8112 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8113
8114 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8115 FMAInstKind::Indexed, &NewVR);
8116}
8117
8118/// genMaddR - Generate madd instruction and combine mul and add using
8119/// an extra virtual register
8120/// Example - an ADD intermediate needs to be stored in a register:
8121/// MUL I=A,B,0
8122/// ADD R,I,Imm
8123/// ==> ORR V, ZR, Imm
8124/// ==> MADD R,A,B,V
8125/// \param MF Containing MachineFunction
8126/// \param MRI Register information
8127/// \param TII Target information
8128/// \param Root is the ADD instruction
8129/// \param [out] InsInstrs is a vector of machine instructions and will
8130/// contain the generated madd instruction
8131/// \param IdxMulOpd is index of operand in Root that is the result of
8132/// the MUL. In the example above IdxMulOpd is 1.
8133/// \param MaddOpc the opcode fo the madd instruction
8134/// \param VR is a virtual register that holds the value of an ADD operand
8135/// (V in the example above).
8136/// \param RC Register class of operands
8138 const TargetInstrInfo *TII, MachineInstr &Root,
8140 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
8141 const TargetRegisterClass *RC) {
8142 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8143
8144 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8145 Register ResultReg = Root.getOperand(0).getReg();
8146 Register SrcReg0 = MUL->getOperand(1).getReg();
8147 bool Src0IsKill = MUL->getOperand(1).isKill();
8148 Register SrcReg1 = MUL->getOperand(2).getReg();
8149 bool Src1IsKill = MUL->getOperand(2).isKill();
8150
8151 if (ResultReg.isVirtual())
8152 MRI.constrainRegClass(ResultReg, RC);
8153 if (SrcReg0.isVirtual())
8154 MRI.constrainRegClass(SrcReg0, RC);
8155 if (SrcReg1.isVirtual())
8156 MRI.constrainRegClass(SrcReg1, RC);
8158 MRI.constrainRegClass(VR, RC);
8159
8161 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8162 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8163 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8164 .addReg(VR);
8165 // Insert the MADD
8166 InsInstrs.push_back(MIB);
8167 return MUL;
8168}
8169
8170/// Do the following transformation
8171/// A - (B + C) ==> (A - B) - C
8172/// A - (B + C) ==> (A - C) - B
8174 const TargetInstrInfo *TII, MachineInstr &Root,
8177 unsigned IdxOpd1,
8178 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
8179 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
8180 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
8181 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
8182
8183 Register ResultReg = Root.getOperand(0).getReg();
8184 Register RegA = Root.getOperand(1).getReg();
8185 bool RegAIsKill = Root.getOperand(1).isKill();
8186 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
8187 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
8188 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
8189 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
8190 Register NewVR =
8191 MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg()));
8192
8193 unsigned Opcode = Root.getOpcode();
8194 if (Opcode == AArch64::SUBSWrr)
8195 Opcode = AArch64::SUBWrr;
8196 else if (Opcode == AArch64::SUBSXrr)
8197 Opcode = AArch64::SUBXrr;
8198 else
8199 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
8200 "Unexpected instruction opcode.");
8201
8202 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
8203 Flags &= ~MachineInstr::NoSWrap;
8204 Flags &= ~MachineInstr::NoUWrap;
8205
8206 MachineInstrBuilder MIB1 =
8207 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
8208 .addReg(RegA, getKillRegState(RegAIsKill))
8209 .addReg(RegB, getKillRegState(RegBIsKill))
8210 .setMIFlags(Flags);
8211 MachineInstrBuilder MIB2 =
8212 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
8213 .addReg(NewVR, getKillRegState(true))
8214 .addReg(RegC, getKillRegState(RegCIsKill))
8215 .setMIFlags(Flags);
8216
8217 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8218 InsInstrs.push_back(MIB1);
8219 InsInstrs.push_back(MIB2);
8220 DelInstrs.push_back(AddMI);
8221 DelInstrs.push_back(&Root);
8222}
8223
8224unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(
8225 unsigned int AccumulatorOpCode) const {
8226 switch (AccumulatorOpCode) {
8227 case AArch64::UABALB_ZZZ_D:
8228 case AArch64::SABALB_ZZZ_D:
8229 case AArch64::UABALT_ZZZ_D:
8230 case AArch64::SABALT_ZZZ_D:
8231 return AArch64::ADD_ZZZ_D;
8232 case AArch64::UABALB_ZZZ_H:
8233 case AArch64::SABALB_ZZZ_H:
8234 case AArch64::UABALT_ZZZ_H:
8235 case AArch64::SABALT_ZZZ_H:
8236 return AArch64::ADD_ZZZ_H;
8237 case AArch64::UABALB_ZZZ_S:
8238 case AArch64::SABALB_ZZZ_S:
8239 case AArch64::UABALT_ZZZ_S:
8240 case AArch64::SABALT_ZZZ_S:
8241 return AArch64::ADD_ZZZ_S;
8242 case AArch64::UABALv16i8_v8i16:
8243 case AArch64::SABALv8i8_v8i16:
8244 case AArch64::SABAv8i16:
8245 case AArch64::UABAv8i16:
8246 return AArch64::ADDv8i16;
8247 case AArch64::SABALv2i32_v2i64:
8248 case AArch64::UABALv2i32_v2i64:
8249 case AArch64::SABALv4i32_v2i64:
8250 return AArch64::ADDv2i64;
8251 case AArch64::UABALv4i16_v4i32:
8252 case AArch64::SABALv4i16_v4i32:
8253 case AArch64::SABALv8i16_v4i32:
8254 case AArch64::SABAv4i32:
8255 case AArch64::UABAv4i32:
8256 return AArch64::ADDv4i32;
8257 case AArch64::UABALv4i32_v2i64:
8258 return AArch64::ADDv2i64;
8259 case AArch64::UABALv8i16_v4i32:
8260 return AArch64::ADDv4i32;
8261 case AArch64::UABALv8i8_v8i16:
8262 case AArch64::SABALv16i8_v8i16:
8263 return AArch64::ADDv8i16;
8264 case AArch64::UABAv16i8:
8265 case AArch64::SABAv16i8:
8266 return AArch64::ADDv16i8;
8267 case AArch64::UABAv4i16:
8268 case AArch64::SABAv4i16:
8269 return AArch64::ADDv4i16;
8270 case AArch64::UABAv2i32:
8271 case AArch64::SABAv2i32:
8272 return AArch64::ADDv2i32;
8273 case AArch64::UABAv8i8:
8274 case AArch64::SABAv8i8:
8275 return AArch64::ADDv8i8;
8276 default:
8277 llvm_unreachable("Unknown accumulator opcode");
8278 }
8279}
8280
8281/// When getMachineCombinerPatterns() finds potential patterns,
8282/// this function generates the instructions that could replace the
8283/// original code sequence
8284void AArch64InstrInfo::genAlternativeCodeSequence(
8285 MachineInstr &Root, unsigned Pattern,
8288 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
8289 MachineBasicBlock &MBB = *Root.getParent();
8290 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
8291 MachineFunction &MF = *MBB.getParent();
8292 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
8293
8294 MachineInstr *MUL = nullptr;
8295 const TargetRegisterClass *RC;
8296 unsigned Opc;
8297 switch (Pattern) {
8298 default:
8299 // Reassociate instructions.
8300 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
8301 DelInstrs, InstrIdxForVirtReg);
8302 return;
8304 // A - (B + C)
8305 // ==> (A - B) - C
8306 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
8307 InstrIdxForVirtReg);
8308 return;
8310 // A - (B + C)
8311 // ==> (A - C) - B
8312 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
8313 InstrIdxForVirtReg);
8314 return;
8317 // MUL I=A,B,0
8318 // ADD R,I,C
8319 // ==> MADD R,A,B,C
8320 // --- Create(MADD);
8322 Opc = AArch64::MADDWrrr;
8323 RC = &AArch64::GPR32RegClass;
8324 } else {
8325 Opc = AArch64::MADDXrrr;
8326 RC = &AArch64::GPR64RegClass;
8327 }
8328 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8329 break;
8332 // MUL I=A,B,0
8333 // ADD R,C,I
8334 // ==> MADD R,A,B,C
8335 // --- Create(MADD);
8337 Opc = AArch64::MADDWrrr;
8338 RC = &AArch64::GPR32RegClass;
8339 } else {
8340 Opc = AArch64::MADDXrrr;
8341 RC = &AArch64::GPR64RegClass;
8342 }
8343 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8344 break;
8349 // MUL I=A,B,0
8350 // ADD/SUB R,I,Imm
8351 // ==> MOV V, Imm/-Imm
8352 // ==> MADD R,A,B,V
8353 // --- Create(MADD);
8354 const TargetRegisterClass *RC;
8355 unsigned BitSize, MovImm;
8358 MovImm = AArch64::MOVi32imm;
8359 RC = &AArch64::GPR32spRegClass;
8360 BitSize = 32;
8361 Opc = AArch64::MADDWrrr;
8362 RC = &AArch64::GPR32RegClass;
8363 } else {
8364 MovImm = AArch64::MOVi64imm;
8365 RC = &AArch64::GPR64spRegClass;
8366 BitSize = 64;
8367 Opc = AArch64::MADDXrrr;
8368 RC = &AArch64::GPR64RegClass;
8369 }
8370 Register NewVR = MRI.createVirtualRegister(RC);
8371 uint64_t Imm = Root.getOperand(2).getImm();
8372
8373 if (Root.getOperand(3).isImm()) {
8374 unsigned Val = Root.getOperand(3).getImm();
8375 Imm = Imm << Val;
8376 }
8377 bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
8379 uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
8380 // Check that the immediate can be composed via a single instruction.
8382 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
8383 if (Insn.size() != 1)
8384 return;
8385 MachineInstrBuilder MIB1 =
8386 BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)
8387 .addImm(IsSub ? -Imm : Imm);
8388 InsInstrs.push_back(MIB1);
8389 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8390 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8391 break;
8392 }
8395 // MUL I=A,B,0
8396 // SUB R,I, C
8397 // ==> SUB V, 0, C
8398 // ==> MADD R,A,B,V // = -C + A*B
8399 // --- Create(MADD);
8400 const TargetRegisterClass *SubRC;
8401 unsigned SubOpc, ZeroReg;
8403 SubOpc = AArch64::SUBWrr;
8404 SubRC = &AArch64::GPR32spRegClass;
8405 ZeroReg = AArch64::WZR;
8406 Opc = AArch64::MADDWrrr;
8407 RC = &AArch64::GPR32RegClass;
8408 } else {
8409 SubOpc = AArch64::SUBXrr;
8410 SubRC = &AArch64::GPR64spRegClass;
8411 ZeroReg = AArch64::XZR;
8412 Opc = AArch64::MADDXrrr;
8413 RC = &AArch64::GPR64RegClass;
8414 }
8415 Register NewVR = MRI.createVirtualRegister(SubRC);
8416 // SUB NewVR, 0, C
8417 MachineInstrBuilder MIB1 =
8418 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
8419 .addReg(ZeroReg)
8420 .add(Root.getOperand(2));
8421 InsInstrs.push_back(MIB1);
8422 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8423 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8424 break;
8425 }
8428 // MUL I=A,B,0
8429 // SUB R,C,I
8430 // ==> MSUB R,A,B,C (computes C - A*B)
8431 // --- Create(MSUB);
8433 Opc = AArch64::MSUBWrrr;
8434 RC = &AArch64::GPR32RegClass;
8435 } else {
8436 Opc = AArch64::MSUBXrrr;
8437 RC = &AArch64::GPR64RegClass;
8438 }
8439 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8440 break;
8442 Opc = AArch64::MLAv8i8;
8443 RC = &AArch64::FPR64RegClass;
8444 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8445 break;
8447 Opc = AArch64::MLAv8i8;
8448 RC = &AArch64::FPR64RegClass;
8449 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8450 break;
8452 Opc = AArch64::MLAv16i8;
8453 RC = &AArch64::FPR128RegClass;
8454 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8455 break;
8457 Opc = AArch64::MLAv16i8;
8458 RC = &AArch64::FPR128RegClass;
8459 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8460 break;
8462 Opc = AArch64::MLAv4i16;
8463 RC = &AArch64::FPR64RegClass;
8464 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8465 break;
8467 Opc = AArch64::MLAv4i16;
8468 RC = &AArch64::FPR64RegClass;
8469 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8470 break;
8472 Opc = AArch64::MLAv8i16;
8473 RC = &AArch64::FPR128RegClass;
8474 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8475 break;
8477 Opc = AArch64::MLAv8i16;
8478 RC = &AArch64::FPR128RegClass;
8479 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8480 break;
8482 Opc = AArch64::MLAv2i32;
8483 RC = &AArch64::FPR64RegClass;
8484 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8485 break;
8487 Opc = AArch64::MLAv2i32;
8488 RC = &AArch64::FPR64RegClass;
8489 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8490 break;
8492 Opc = AArch64::MLAv4i32;
8493 RC = &AArch64::FPR128RegClass;
8494 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8495 break;
8497 Opc = AArch64::MLAv4i32;
8498 RC = &AArch64::FPR128RegClass;
8499 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8500 break;
8501
8503 Opc = AArch64::MLAv8i8;
8504 RC = &AArch64::FPR64RegClass;
8505 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8506 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
8507 RC);
8508 break;
8510 Opc = AArch64::MLSv8i8;
8511 RC = &AArch64::FPR64RegClass;
8512 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8513 break;
8515 Opc = AArch64::MLAv16i8;
8516 RC = &AArch64::FPR128RegClass;
8517 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8518 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
8519 RC);
8520 break;
8522 Opc = AArch64::MLSv16i8;
8523 RC = &AArch64::FPR128RegClass;
8524 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8525 break;
8527 Opc = AArch64::MLAv4i16;
8528 RC = &AArch64::FPR64RegClass;
8529 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8530 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8531 RC);
8532 break;
8534 Opc = AArch64::MLSv4i16;
8535 RC = &AArch64::FPR64RegClass;
8536 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8537 break;
8539 Opc = AArch64::MLAv8i16;
8540 RC = &AArch64::FPR128RegClass;
8541 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8542 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8543 RC);
8544 break;
8546 Opc = AArch64::MLSv8i16;
8547 RC = &AArch64::FPR128RegClass;
8548 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8549 break;
8551 Opc = AArch64::MLAv2i32;
8552 RC = &AArch64::FPR64RegClass;
8553 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8554 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8555 RC);
8556 break;
8558 Opc = AArch64::MLSv2i32;
8559 RC = &AArch64::FPR64RegClass;
8560 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8561 break;
8563 Opc = AArch64::MLAv4i32;
8564 RC = &AArch64::FPR128RegClass;
8565 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8566 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8567 RC);
8568 break;
8570 Opc = AArch64::MLSv4i32;
8571 RC = &AArch64::FPR128RegClass;
8572 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8573 break;
8574
8576 Opc = AArch64::MLAv4i16_indexed;
8577 RC = &AArch64::FPR64RegClass;
8578 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8579 break;
8581 Opc = AArch64::MLAv4i16_indexed;
8582 RC = &AArch64::FPR64RegClass;
8583 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8584 break;
8586 Opc = AArch64::MLAv8i16_indexed;
8587 RC = &AArch64::FPR128RegClass;
8588 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8589 break;
8591 Opc = AArch64::MLAv8i16_indexed;
8592 RC = &AArch64::FPR128RegClass;
8593 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8594 break;
8596 Opc = AArch64::MLAv2i32_indexed;
8597 RC = &AArch64::FPR64RegClass;
8598 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8599 break;
8601 Opc = AArch64::MLAv2i32_indexed;
8602 RC = &AArch64::FPR64RegClass;
8603 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8604 break;
8606 Opc = AArch64::MLAv4i32_indexed;
8607 RC = &AArch64::FPR128RegClass;
8608 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8609 break;
8611 Opc = AArch64::MLAv4i32_indexed;
8612 RC = &AArch64::FPR128RegClass;
8613 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8614 break;
8615
8617 Opc = AArch64::MLAv4i16_indexed;
8618 RC = &AArch64::FPR64RegClass;
8619 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8620 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8621 RC);
8622 break;
8624 Opc = AArch64::MLSv4i16_indexed;
8625 RC = &AArch64::FPR64RegClass;
8626 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8627 break;
8629 Opc = AArch64::MLAv8i16_indexed;
8630 RC = &AArch64::FPR128RegClass;
8631 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8632 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8633 RC);
8634 break;
8636 Opc = AArch64::MLSv8i16_indexed;
8637 RC = &AArch64::FPR128RegClass;
8638 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8639 break;
8641 Opc = AArch64::MLAv2i32_indexed;
8642 RC = &AArch64::FPR64RegClass;
8643 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8644 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8645 RC);
8646 break;
8648 Opc = AArch64::MLSv2i32_indexed;
8649 RC = &AArch64::FPR64RegClass;
8650 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8651 break;
8653 Opc = AArch64::MLAv4i32_indexed;
8654 RC = &AArch64::FPR128RegClass;
8655 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8656 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8657 RC);
8658 break;
8660 Opc = AArch64::MLSv4i32_indexed;
8661 RC = &AArch64::FPR128RegClass;
8662 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8663 break;
8664
8665 // Floating Point Support
8667 Opc = AArch64::FMADDHrrr;
8668 RC = &AArch64::FPR16RegClass;
8669 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8670 break;
8672 Opc = AArch64::FMADDSrrr;
8673 RC = &AArch64::FPR32RegClass;
8674 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8675 break;
8677 Opc = AArch64::FMADDDrrr;
8678 RC = &AArch64::FPR64RegClass;
8679 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8680 break;
8681
8683 Opc = AArch64::FMADDHrrr;
8684 RC = &AArch64::FPR16RegClass;
8685 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8686 break;
8688 Opc = AArch64::FMADDSrrr;
8689 RC = &AArch64::FPR32RegClass;
8690 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8691 break;
8693 Opc = AArch64::FMADDDrrr;
8694 RC = &AArch64::FPR64RegClass;
8695 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8696 break;
8697
8699 Opc = AArch64::FMLAv1i32_indexed;
8700 RC = &AArch64::FPR32RegClass;
8701 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8703 break;
8705 Opc = AArch64::FMLAv1i32_indexed;
8706 RC = &AArch64::FPR32RegClass;
8707 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8709 break;
8710
8712 Opc = AArch64::FMLAv1i64_indexed;
8713 RC = &AArch64::FPR64RegClass;
8714 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8716 break;
8718 Opc = AArch64::FMLAv1i64_indexed;
8719 RC = &AArch64::FPR64RegClass;
8720 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8722 break;
8723
8725 RC = &AArch64::FPR64RegClass;
8726 Opc = AArch64::FMLAv4i16_indexed;
8727 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8729 break;
8731 RC = &AArch64::FPR64RegClass;
8732 Opc = AArch64::FMLAv4f16;
8733 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8735 break;
8737 RC = &AArch64::FPR64RegClass;
8738 Opc = AArch64::FMLAv4i16_indexed;
8739 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8741 break;
8743 RC = &AArch64::FPR64RegClass;
8744 Opc = AArch64::FMLAv4f16;
8745 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8747 break;
8748
8751 RC = &AArch64::FPR64RegClass;
8753 Opc = AArch64::FMLAv2i32_indexed;
8754 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8756 } else {
8757 Opc = AArch64::FMLAv2f32;
8758 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8760 }
8761 break;
8764 RC = &AArch64::FPR64RegClass;
8766 Opc = AArch64::FMLAv2i32_indexed;
8767 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8769 } else {
8770 Opc = AArch64::FMLAv2f32;
8771 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8773 }
8774 break;
8775
8777 RC = &AArch64::FPR128RegClass;
8778 Opc = AArch64::FMLAv8i16_indexed;
8779 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8781 break;
8783 RC = &AArch64::FPR128RegClass;
8784 Opc = AArch64::FMLAv8f16;
8785 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8787 break;
8789 RC = &AArch64::FPR128RegClass;
8790 Opc = AArch64::FMLAv8i16_indexed;
8791 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8793 break;
8795 RC = &AArch64::FPR128RegClass;
8796 Opc = AArch64::FMLAv8f16;
8797 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8799 break;
8800
8803 RC = &AArch64::FPR128RegClass;
8805 Opc = AArch64::FMLAv2i64_indexed;
8806 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8808 } else {
8809 Opc = AArch64::FMLAv2f64;
8810 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8812 }
8813 break;
8816 RC = &AArch64::FPR128RegClass;
8818 Opc = AArch64::FMLAv2i64_indexed;
8819 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8821 } else {
8822 Opc = AArch64::FMLAv2f64;
8823 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8825 }
8826 break;
8827
8830 RC = &AArch64::FPR128RegClass;
8832 Opc = AArch64::FMLAv4i32_indexed;
8833 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8835 } else {
8836 Opc = AArch64::FMLAv4f32;
8837 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8839 }
8840 break;
8841
8844 RC = &AArch64::FPR128RegClass;
8846 Opc = AArch64::FMLAv4i32_indexed;
8847 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8849 } else {
8850 Opc = AArch64::FMLAv4f32;
8851 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8853 }
8854 break;
8855
8857 Opc = AArch64::FNMSUBHrrr;
8858 RC = &AArch64::FPR16RegClass;
8859 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8860 break;
8862 Opc = AArch64::FNMSUBSrrr;
8863 RC = &AArch64::FPR32RegClass;
8864 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8865 break;
8867 Opc = AArch64::FNMSUBDrrr;
8868 RC = &AArch64::FPR64RegClass;
8869 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8870 break;
8871
8873 Opc = AArch64::FNMADDHrrr;
8874 RC = &AArch64::FPR16RegClass;
8875 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8876 break;
8878 Opc = AArch64::FNMADDSrrr;
8879 RC = &AArch64::FPR32RegClass;
8880 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8881 break;
8883 Opc = AArch64::FNMADDDrrr;
8884 RC = &AArch64::FPR64RegClass;
8885 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8886 break;
8887
8889 Opc = AArch64::FMSUBHrrr;
8890 RC = &AArch64::FPR16RegClass;
8891 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8892 break;
8894 Opc = AArch64::FMSUBSrrr;
8895 RC = &AArch64::FPR32RegClass;
8896 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8897 break;
8899 Opc = AArch64::FMSUBDrrr;
8900 RC = &AArch64::FPR64RegClass;
8901 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8902 break;
8903
8905 Opc = AArch64::FMLSv1i32_indexed;
8906 RC = &AArch64::FPR32RegClass;
8907 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8909 break;
8910
8912 Opc = AArch64::FMLSv1i64_indexed;
8913 RC = &AArch64::FPR64RegClass;
8914 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8916 break;
8917
8920 RC = &AArch64::FPR64RegClass;
8921 Register NewVR = MRI.createVirtualRegister(RC);
8922 MachineInstrBuilder MIB1 =
8923 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
8924 .add(Root.getOperand(2));
8925 InsInstrs.push_back(MIB1);
8926 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8928 Opc = AArch64::FMLAv4f16;
8929 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8930 FMAInstKind::Accumulator, &NewVR);
8931 } else {
8932 Opc = AArch64::FMLAv4i16_indexed;
8933 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8934 FMAInstKind::Indexed, &NewVR);
8935 }
8936 break;
8937 }
8939 RC = &AArch64::FPR64RegClass;
8940 Opc = AArch64::FMLSv4f16;
8941 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8943 break;
8945 RC = &AArch64::FPR64RegClass;
8946 Opc = AArch64::FMLSv4i16_indexed;
8947 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8949 break;
8950
8953 RC = &AArch64::FPR64RegClass;
8955 Opc = AArch64::FMLSv2i32_indexed;
8956 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8958 } else {
8959 Opc = AArch64::FMLSv2f32;
8960 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8962 }
8963 break;
8964
8967 RC = &AArch64::FPR128RegClass;
8968 Register NewVR = MRI.createVirtualRegister(RC);
8969 MachineInstrBuilder MIB1 =
8970 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
8971 .add(Root.getOperand(2));
8972 InsInstrs.push_back(MIB1);
8973 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8975 Opc = AArch64::FMLAv8f16;
8976 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8977 FMAInstKind::Accumulator, &NewVR);
8978 } else {
8979 Opc = AArch64::FMLAv8i16_indexed;
8980 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8981 FMAInstKind::Indexed, &NewVR);
8982 }
8983 break;
8984 }
8986 RC = &AArch64::FPR128RegClass;
8987 Opc = AArch64::FMLSv8f16;
8988 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8990 break;
8992 RC = &AArch64::FPR128RegClass;
8993 Opc = AArch64::FMLSv8i16_indexed;
8994 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8996 break;
8997
9000 RC = &AArch64::FPR128RegClass;
9002 Opc = AArch64::FMLSv2i64_indexed;
9003 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9005 } else {
9006 Opc = AArch64::FMLSv2f64;
9007 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9009 }
9010 break;
9011
9014 RC = &AArch64::FPR128RegClass;
9016 Opc = AArch64::FMLSv4i32_indexed;
9017 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9019 } else {
9020 Opc = AArch64::FMLSv4f32;
9021 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9023 }
9024 break;
9027 RC = &AArch64::FPR64RegClass;
9028 Register NewVR = MRI.createVirtualRegister(RC);
9029 MachineInstrBuilder MIB1 =
9030 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
9031 .add(Root.getOperand(2));
9032 InsInstrs.push_back(MIB1);
9033 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9035 Opc = AArch64::FMLAv2i32_indexed;
9036 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9037 FMAInstKind::Indexed, &NewVR);
9038 } else {
9039 Opc = AArch64::FMLAv2f32;
9040 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9041 FMAInstKind::Accumulator, &NewVR);
9042 }
9043 break;
9044 }
9047 RC = &AArch64::FPR128RegClass;
9048 Register NewVR = MRI.createVirtualRegister(RC);
9049 MachineInstrBuilder MIB1 =
9050 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
9051 .add(Root.getOperand(2));
9052 InsInstrs.push_back(MIB1);
9053 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9055 Opc = AArch64::FMLAv4i32_indexed;
9056 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9057 FMAInstKind::Indexed, &NewVR);
9058 } else {
9059 Opc = AArch64::FMLAv4f32;
9060 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9061 FMAInstKind::Accumulator, &NewVR);
9062 }
9063 break;
9064 }
9067 RC = &AArch64::FPR128RegClass;
9068 Register NewVR = MRI.createVirtualRegister(RC);
9069 MachineInstrBuilder MIB1 =
9070 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
9071 .add(Root.getOperand(2));
9072 InsInstrs.push_back(MIB1);
9073 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9075 Opc = AArch64::FMLAv2i64_indexed;
9076 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9077 FMAInstKind::Indexed, &NewVR);
9078 } else {
9079 Opc = AArch64::FMLAv2f64;
9080 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9081 FMAInstKind::Accumulator, &NewVR);
9082 }
9083 break;
9084 }
9087 unsigned IdxDupOp =
9089 : 2;
9090 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
9091 &AArch64::FPR128RegClass, MRI);
9092 break;
9093 }
9096 unsigned IdxDupOp =
9098 : 2;
9099 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
9100 &AArch64::FPR128RegClass, MRI);
9101 break;
9102 }
9105 unsigned IdxDupOp =
9107 : 2;
9108 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
9109 &AArch64::FPR128_loRegClass, MRI);
9110 break;
9111 }
9114 unsigned IdxDupOp =
9116 : 2;
9117 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
9118 &AArch64::FPR128RegClass, MRI);
9119 break;
9120 }
9123 unsigned IdxDupOp =
9125 : 2;
9126 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
9127 &AArch64::FPR128_loRegClass, MRI);
9128 break;
9129 }
9131 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
9132 break;
9133 }
9135 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9136 Pattern, 4);
9137 break;
9138 }
9140 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9141 Pattern, 8);
9142 break;
9143 }
9145 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9146 Pattern, 16);
9147 break;
9148 }
9149
9150 } // end switch (Pattern)
9151 // Record MUL and ADD/SUB for deletion
9152 if (MUL)
9153 DelInstrs.push_back(MUL);
9154 DelInstrs.push_back(&Root);
9155
9156 // Set the flags on the inserted instructions to be the merged flags of the
9157 // instructions that we have combined.
9158 uint32_t Flags = Root.getFlags();
9159 if (MUL)
9160 Flags = Root.mergeFlagsWith(*MUL);
9161 for (auto *MI : InsInstrs)
9162 MI->setFlags(Flags);
9163}
9164
9165/// Replace csincr-branch sequence by simple conditional branch
9166///
9167/// Examples:
9168/// 1. \code
9169/// csinc w9, wzr, wzr, <condition code>
9170/// tbnz w9, #0, 0x44
9171/// \endcode
9172/// to
9173/// \code
9174/// b.<inverted condition code>
9175/// \endcode
9176///
9177/// 2. \code
9178/// csinc w9, wzr, wzr, <condition code>
9179/// tbz w9, #0, 0x44
9180/// \endcode
9181/// to
9182/// \code
9183/// b.<condition code>
9184/// \endcode
9185///
9186/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
9187/// compare's constant operand is power of 2.
9188///
9189/// Examples:
9190/// \code
9191/// and w8, w8, #0x400
9192/// cbnz w8, L1
9193/// \endcode
9194/// to
9195/// \code
9196/// tbnz w8, #10, L1
9197/// \endcode
9198///
9199/// \param MI Conditional Branch
9200/// \return True when the simple conditional branch is generated
9201///
9203 bool IsNegativeBranch = false;
9204 bool IsTestAndBranch = false;
9205 unsigned TargetBBInMI = 0;
9206 switch (MI.getOpcode()) {
9207 default:
9208 llvm_unreachable("Unknown branch instruction?");
9209 case AArch64::Bcc:
9210 case AArch64::CBWPri:
9211 case AArch64::CBXPri:
9212 case AArch64::CBWPrr:
9213 case AArch64::CBXPrr:
9214 return false;
9215 case AArch64::CBZW:
9216 case AArch64::CBZX:
9217 TargetBBInMI = 1;
9218 break;
9219 case AArch64::CBNZW:
9220 case AArch64::CBNZX:
9221 TargetBBInMI = 1;
9222 IsNegativeBranch = true;
9223 break;
9224 case AArch64::TBZW:
9225 case AArch64::TBZX:
9226 TargetBBInMI = 2;
9227 IsTestAndBranch = true;
9228 break;
9229 case AArch64::TBNZW:
9230 case AArch64::TBNZX:
9231 TargetBBInMI = 2;
9232 IsNegativeBranch = true;
9233 IsTestAndBranch = true;
9234 break;
9235 }
9236 // So we increment a zero register and test for bits other
9237 // than bit 0? Conservatively bail out in case the verifier
9238 // missed this case.
9239 if (IsTestAndBranch && MI.getOperand(1).getImm())
9240 return false;
9241
9242 // Find Definition.
9243 assert(MI.getParent() && "Incomplete machine instruction\n");
9244 MachineBasicBlock *MBB = MI.getParent();
9245 MachineFunction *MF = MBB->getParent();
9247 Register VReg = MI.getOperand(0).getReg();
9248 if (!VReg.isVirtual())
9249 return false;
9250
9251 MachineInstr *DefMI = MRI->getVRegDef(VReg);
9252
9253 // Look through COPY instructions to find definition.
9254 while (DefMI->isCopy()) {
9255 Register CopyVReg = DefMI->getOperand(1).getReg();
9256 if (!MRI->hasOneNonDBGUse(CopyVReg))
9257 return false;
9258 if (!MRI->hasOneDef(CopyVReg))
9259 return false;
9260 DefMI = MRI->getVRegDef(CopyVReg);
9261 }
9262
9263 switch (DefMI->getOpcode()) {
9264 default:
9265 return false;
9266 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
9267 case AArch64::ANDWri:
9268 case AArch64::ANDXri: {
9269 if (IsTestAndBranch)
9270 return false;
9271 if (DefMI->getParent() != MBB)
9272 return false;
9273 if (!MRI->hasOneNonDBGUse(VReg))
9274 return false;
9275
9276 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
9278 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
9279 if (!isPowerOf2_64(Mask))
9280 return false;
9281
9282 MachineOperand &MO = DefMI->getOperand(1);
9283 Register NewReg = MO.getReg();
9284 if (!NewReg.isVirtual())
9285 return false;
9286
9287 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
9288
9289 MachineBasicBlock &RefToMBB = *MBB;
9290 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
9291 DebugLoc DL = MI.getDebugLoc();
9292 unsigned Imm = Log2_64(Mask);
9293 unsigned Opc = (Imm < 32)
9294 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
9295 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
9296 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
9297 .addReg(NewReg)
9298 .addImm(Imm)
9299 .addMBB(TBB);
9300 // Register lives on to the CBZ now.
9301 MO.setIsKill(false);
9302
9303 // For immediate smaller than 32, we need to use the 32-bit
9304 // variant (W) in all cases. Indeed the 64-bit variant does not
9305 // allow to encode them.
9306 // Therefore, if the input register is 64-bit, we need to take the
9307 // 32-bit sub-part.
9308 if (!Is32Bit && Imm < 32)
9309 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
9310 MI.eraseFromParent();
9311 return true;
9312 }
9313 // Look for CSINC
9314 case AArch64::CSINCWr:
9315 case AArch64::CSINCXr: {
9316 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
9317 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
9318 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
9319 DefMI->getOperand(2).getReg() == AArch64::XZR))
9320 return false;
9321
9322 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
9323 true) != -1)
9324 return false;
9325
9326 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
9327 // Convert only when the condition code is not modified between
9328 // the CSINC and the branch. The CC may be used by other
9329 // instructions in between.
9331 return false;
9332 MachineBasicBlock &RefToMBB = *MBB;
9333 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
9334 DebugLoc DL = MI.getDebugLoc();
9335 if (IsNegativeBranch)
9337 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
9338 MI.eraseFromParent();
9339 return true;
9340 }
9341 }
9342}
9343
9344std::pair<unsigned, unsigned>
9345AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
9346 const unsigned Mask = AArch64II::MO_FRAGMENT;
9347 return std::make_pair(TF & Mask, TF & ~Mask);
9348}
9349
9351AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
9352 using namespace AArch64II;
9353
9354 static const std::pair<unsigned, const char *> TargetFlags[] = {
9355 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
9356 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
9357 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
9358 {MO_HI12, "aarch64-hi12"}};
9359 return ArrayRef(TargetFlags);
9360}
9361
9363AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
9364 using namespace AArch64II;
9365
9366 static const std::pair<unsigned, const char *> TargetFlags[] = {
9367 {MO_COFFSTUB, "aarch64-coffstub"},
9368 {MO_GOT, "aarch64-got"},
9369 {MO_NC, "aarch64-nc"},
9370 {MO_S, "aarch64-s"},
9371 {MO_TLS, "aarch64-tls"},
9372 {MO_DLLIMPORT, "aarch64-dllimport"},
9373 {MO_PREL, "aarch64-prel"},
9374 {MO_TAGGED, "aarch64-tagged"},
9375 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
9376 };
9377 return ArrayRef(TargetFlags);
9378}
9379
9381AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
9382 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9383 {{MOSuppressPair, "aarch64-suppress-pair"},
9384 {MOStridedAccess, "aarch64-strided-access"}};
9385 return ArrayRef(TargetFlags);
9386}
9387
9388/// Constants defining how certain sequences should be outlined.
9389/// This encompasses how an outlined function should be called, and what kind of
9390/// frame should be emitted for that outlined function.
9391///
9392/// \p MachineOutlinerDefault implies that the function should be called with
9393/// a save and restore of LR to the stack.
9394///
9395/// That is,
9396///
9397/// I1 Save LR OUTLINED_FUNCTION:
9398/// I2 --> BL OUTLINED_FUNCTION I1
9399/// I3 Restore LR I2
9400/// I3
9401/// RET
9402///
9403/// * Call construction overhead: 3 (save + BL + restore)
9404/// * Frame construction overhead: 1 (ret)
9405/// * Requires stack fixups? Yes
9406///
9407/// \p MachineOutlinerTailCall implies that the function is being created from
9408/// a sequence of instructions ending in a return.
9409///
9410/// That is,
9411///
9412/// I1 OUTLINED_FUNCTION:
9413/// I2 --> B OUTLINED_FUNCTION I1
9414/// RET I2
9415/// RET
9416///
9417/// * Call construction overhead: 1 (B)
9418/// * Frame construction overhead: 0 (Return included in sequence)
9419/// * Requires stack fixups? No
9420///
9421/// \p MachineOutlinerNoLRSave implies that the function should be called using
9422/// a BL instruction, but doesn't require LR to be saved and restored. This
9423/// happens when LR is known to be dead.
9424///
9425/// That is,
9426///
9427/// I1 OUTLINED_FUNCTION:
9428/// I2 --> BL OUTLINED_FUNCTION I1
9429/// I3 I2
9430/// I3
9431/// RET
9432///
9433/// * Call construction overhead: 1 (BL)
9434/// * Frame construction overhead: 1 (RET)
9435/// * Requires stack fixups? No
9436///
9437/// \p MachineOutlinerThunk implies that the function is being created from
9438/// a sequence of instructions ending in a call. The outlined function is
9439/// called with a BL instruction, and the outlined function tail-calls the
9440/// original call destination.
9441///
9442/// That is,
9443///
9444/// I1 OUTLINED_FUNCTION:
9445/// I2 --> BL OUTLINED_FUNCTION I1
9446/// BL f I2
9447/// B f
9448/// * Call construction overhead: 1 (BL)
9449/// * Frame construction overhead: 0
9450/// * Requires stack fixups? No
9451///
9452/// \p MachineOutlinerRegSave implies that the function should be called with a
9453/// save and restore of LR to an available register. This allows us to avoid
9454/// stack fixups. Note that this outlining variant is compatible with the
9455/// NoLRSave case.
9456///
9457/// That is,
9458///
9459/// I1 Save LR OUTLINED_FUNCTION:
9460/// I2 --> BL OUTLINED_FUNCTION I1
9461/// I3 Restore LR I2
9462/// I3
9463/// RET
9464///
9465/// * Call construction overhead: 3 (save + BL + restore)
9466/// * Frame construction overhead: 1 (ret)
9467/// * Requires stack fixups? No
9469 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
9470 MachineOutlinerTailCall, /// Only emit a branch.
9471 MachineOutlinerNoLRSave, /// Emit a call and return.
9472 MachineOutlinerThunk, /// Emit a call and tail-call.
9473 MachineOutlinerRegSave /// Same as default, but save to a register.
9474};
9475
9481
9483AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
9484 MachineFunction *MF = C.getMF();
9485 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
9486 const AArch64RegisterInfo *ARI =
9487 static_cast<const AArch64RegisterInfo *>(&TRI);
9488 // Check if there is an available register across the sequence that we can
9489 // use.
9490 for (unsigned Reg : AArch64::GPR64RegClass) {
9491 if (!ARI->isReservedReg(*MF, Reg) &&
9492 Reg != AArch64::LR && // LR is not reserved, but don't use it.
9493 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
9494 Reg != AArch64::X17 && // Ditto for X17.
9495 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
9496 C.isAvailableInsideSeq(Reg, TRI))
9497 return Reg;
9498 }
9499 return Register();
9500}
9501
9502static bool
9504 const outliner::Candidate &b) {
9505 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9506 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9507
9508 return MFIa->shouldSignReturnAddress(false) == MFIb->shouldSignReturnAddress(false) &&
9509 MFIa->shouldSignReturnAddress(true) == MFIb->shouldSignReturnAddress(true);
9510}
9511
9512static bool
9514 const outliner::Candidate &b) {
9515 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9516 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9517
9518 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
9519}
9520
9522 const outliner::Candidate &b) {
9523 const AArch64Subtarget &SubtargetA =
9525 const AArch64Subtarget &SubtargetB =
9526 b.getMF()->getSubtarget<AArch64Subtarget>();
9527 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
9528}
9529
9530std::optional<std::unique_ptr<outliner::OutlinedFunction>>
9531AArch64InstrInfo::getOutliningCandidateInfo(
9532 const MachineModuleInfo &MMI,
9533 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
9534 unsigned MinRepeats) const {
9535 unsigned SequenceSize = 0;
9536 for (auto &MI : RepeatedSequenceLocs[0])
9537 SequenceSize += getInstSizeInBytes(MI);
9538
9539 unsigned NumBytesToCreateFrame = 0;
9540
9541 // We only allow outlining for functions having exactly matching return
9542 // address signing attributes, i.e., all share the same value for the
9543 // attribute "sign-return-address" and all share the same type of key they
9544 // are signed with.
9545 // Additionally we require all functions to simultaneously either support
9546 // v8.3a features or not. Otherwise an outlined function could get signed
9547 // using dedicated v8.3 instructions and a call from a function that doesn't
9548 // support v8.3 instructions would therefore be invalid.
9549 if (std::adjacent_find(
9550 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
9551 [](const outliner::Candidate &a, const outliner::Candidate &b) {
9552 // Return true if a and b are non-equal w.r.t. return address
9553 // signing or support of v8.3a features
9554 if (outliningCandidatesSigningScopeConsensus(a, b) &&
9555 outliningCandidatesSigningKeyConsensus(a, b) &&
9556 outliningCandidatesV8_3OpsConsensus(a, b)) {
9557 return false;
9558 }
9559 return true;
9560 }) != RepeatedSequenceLocs.end()) {
9561 return std::nullopt;
9562 }
9563
9564 // Since at this point all candidates agree on their return address signing
9565 // picking just one is fine. If the candidate functions potentially sign their
9566 // return addresses, the outlined function should do the same. Note that in
9567 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
9568 // not certainly true that the outlined function will have to sign its return
9569 // address but this decision is made later, when the decision to outline
9570 // has already been made.
9571 // The same holds for the number of additional instructions we need: On
9572 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
9573 // necessary. However, at this point we don't know if the outlined function
9574 // will have a RET instruction so we assume the worst.
9575 const TargetRegisterInfo &TRI = getRegisterInfo();
9576 // Performing a tail call may require extra checks when PAuth is enabled.
9577 // If PAuth is disabled, set it to zero for uniformity.
9578 unsigned NumBytesToCheckLRInTCEpilogue = 0;
9579 if (RepeatedSequenceLocs[0]
9580 .getMF()
9581 ->getInfo<AArch64FunctionInfo>()
9582 ->shouldSignReturnAddress(true)) {
9583 // One PAC and one AUT instructions
9584 NumBytesToCreateFrame += 8;
9585
9586 // PAuth is enabled - set extra tail call cost, if any.
9587 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
9588 *RepeatedSequenceLocs[0].getMF());
9589 NumBytesToCheckLRInTCEpilogue =
9591 // Checking the authenticated LR value may significantly impact
9592 // SequenceSize, so account for it for more precise results.
9593 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
9594 SequenceSize += NumBytesToCheckLRInTCEpilogue;
9595
9596 // We have to check if sp modifying instructions would get outlined.
9597 // If so we only allow outlining if sp is unchanged overall, so matching
9598 // sub and add instructions are okay to outline, all other sp modifications
9599 // are not
9600 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
9601 int SPValue = 0;
9602 for (auto &MI : C) {
9603 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
9604 switch (MI.getOpcode()) {
9605 case AArch64::ADDXri:
9606 case AArch64::ADDWri:
9607 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9608 assert(MI.getOperand(2).isImm() &&
9609 "Expected operand to be immediate");
9610 assert(MI.getOperand(1).isReg() &&
9611 "Expected operand to be a register");
9612 // Check if the add just increments sp. If so, we search for
9613 // matching sub instructions that decrement sp. If not, the
9614 // modification is illegal
9615 if (MI.getOperand(1).getReg() == AArch64::SP)
9616 SPValue += MI.getOperand(2).getImm();
9617 else
9618 return true;
9619 break;
9620 case AArch64::SUBXri:
9621 case AArch64::SUBWri:
9622 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9623 assert(MI.getOperand(2).isImm() &&
9624 "Expected operand to be immediate");
9625 assert(MI.getOperand(1).isReg() &&
9626 "Expected operand to be a register");
9627 // Check if the sub just decrements sp. If so, we search for
9628 // matching add instructions that increment sp. If not, the
9629 // modification is illegal
9630 if (MI.getOperand(1).getReg() == AArch64::SP)
9631 SPValue -= MI.getOperand(2).getImm();
9632 else
9633 return true;
9634 break;
9635 default:
9636 return true;
9637 }
9638 }
9639 }
9640 if (SPValue)
9641 return true;
9642 return false;
9643 };
9644 // Remove candidates with illegal stack modifying instructions
9645 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
9646
9647 // If the sequence doesn't have enough candidates left, then we're done.
9648 if (RepeatedSequenceLocs.size() < MinRepeats)
9649 return std::nullopt;
9650 }
9651
9652 // Properties about candidate MBBs that hold for all of them.
9653 unsigned FlagsSetInAll = 0xF;
9654
9655 // Compute liveness information for each candidate, and set FlagsSetInAll.
9656 for (outliner::Candidate &C : RepeatedSequenceLocs)
9657 FlagsSetInAll &= C.Flags;
9658
9659 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
9660
9661 // Helper lambda which sets call information for every candidate.
9662 auto SetCandidateCallInfo =
9663 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
9664 for (outliner::Candidate &C : RepeatedSequenceLocs)
9665 C.setCallInfo(CallID, NumBytesForCall);
9666 };
9667
9668 unsigned FrameID = MachineOutlinerDefault;
9669 NumBytesToCreateFrame += 4;
9670
9671 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
9672 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
9673 });
9674
9675 // We check to see if CFI Instructions are present, and if they are
9676 // we find the number of CFI Instructions in the candidates.
9677 unsigned CFICount = 0;
9678 for (auto &I : RepeatedSequenceLocs[0]) {
9679 if (I.isCFIInstruction())
9680 CFICount++;
9681 }
9682
9683 // We compare the number of found CFI Instructions to the number of CFI
9684 // instructions in the parent function for each candidate. We must check this
9685 // since if we outline one of the CFI instructions in a function, we have to
9686 // outline them all for correctness. If we do not, the address offsets will be
9687 // incorrect between the two sections of the program.
9688 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9689 std::vector<MCCFIInstruction> CFIInstructions =
9690 C.getMF()->getFrameInstructions();
9691
9692 if (CFICount > 0 && CFICount != CFIInstructions.size())
9693 return std::nullopt;
9694 }
9695
9696 // Returns true if an instructions is safe to fix up, false otherwise.
9697 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
9698 if (MI.isCall())
9699 return true;
9700
9701 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
9702 !MI.readsRegister(AArch64::SP, &TRI))
9703 return true;
9704
9705 // Any modification of SP will break our code to save/restore LR.
9706 // FIXME: We could handle some instructions which add a constant
9707 // offset to SP, with a bit more work.
9708 if (MI.modifiesRegister(AArch64::SP, &TRI))
9709 return false;
9710
9711 // At this point, we have a stack instruction that we might need to
9712 // fix up. We'll handle it if it's a load or store.
9713 if (MI.mayLoadOrStore()) {
9714 const MachineOperand *Base; // Filled with the base operand of MI.
9715 int64_t Offset; // Filled with the offset of MI.
9716 bool OffsetIsScalable;
9717
9718 // Does it allow us to offset the base operand and is the base the
9719 // register SP?
9720 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
9721 !Base->isReg() || Base->getReg() != AArch64::SP)
9722 return false;
9723
9724 // Fixe-up code below assumes bytes.
9725 if (OffsetIsScalable)
9726 return false;
9727
9728 // Find the minimum/maximum offset for this instruction and check
9729 // if fixing it up would be in range.
9730 int64_t MinOffset,
9731 MaxOffset; // Unscaled offsets for the instruction.
9732 // The scale to multiply the offsets by.
9733 TypeSize Scale(0U, false), DummyWidth(0U, false);
9734 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
9735
9736 Offset += 16; // Update the offset to what it would be if we outlined.
9737 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
9738 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
9739 return false;
9740
9741 // It's in range, so we can outline it.
9742 return true;
9743 }
9744
9745 // FIXME: Add handling for instructions like "add x0, sp, #8".
9746
9747 // We can't fix it up, so don't outline it.
9748 return false;
9749 };
9750
9751 // True if it's possible to fix up each stack instruction in this sequence.
9752 // Important for frames/call variants that modify the stack.
9753 bool AllStackInstrsSafe =
9754 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
9755
9756 // If the last instruction in any candidate is a terminator, then we should
9757 // tail call all of the candidates.
9758 if (RepeatedSequenceLocs[0].back().isTerminator()) {
9759 FrameID = MachineOutlinerTailCall;
9760 NumBytesToCreateFrame = 0;
9761 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
9762 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
9763 }
9764
9765 else if (LastInstrOpcode == AArch64::BL ||
9766 ((LastInstrOpcode == AArch64::BLR ||
9767 LastInstrOpcode == AArch64::BLRNoIP) &&
9768 !HasBTI)) {
9769 // FIXME: Do we need to check if the code after this uses the value of LR?
9770 FrameID = MachineOutlinerThunk;
9771 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
9772 SetCandidateCallInfo(MachineOutlinerThunk, 4);
9773 }
9774
9775 else {
9776 // We need to decide how to emit calls + frames. We can always emit the same
9777 // frame if we don't need to save to the stack. If we have to save to the
9778 // stack, then we need a different frame.
9779 unsigned NumBytesNoStackCalls = 0;
9780 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
9781
9782 // Check if we have to save LR.
9783 for (outliner::Candidate &C : RepeatedSequenceLocs) {
9784 bool LRAvailable =
9786 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
9787 : true;
9788 // If we have a noreturn caller, then we're going to be conservative and
9789 // say that we have to save LR. If we don't have a ret at the end of the
9790 // block, then we can't reason about liveness accurately.
9791 //
9792 // FIXME: We can probably do better than always disabling this in
9793 // noreturn functions by fixing up the liveness info.
9794 bool IsNoReturn =
9795 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
9796
9797 // Is LR available? If so, we don't need a save.
9798 if (LRAvailable && !IsNoReturn) {
9799 NumBytesNoStackCalls += 4;
9800 C.setCallInfo(MachineOutlinerNoLRSave, 4);
9801 CandidatesWithoutStackFixups.push_back(C);
9802 }
9803
9804 // Is an unused register available? If so, we won't modify the stack, so
9805 // we can outline with the same frame type as those that don't save LR.
9806 else if (findRegisterToSaveLRTo(C)) {
9807 NumBytesNoStackCalls += 12;
9808 C.setCallInfo(MachineOutlinerRegSave, 12);
9809 CandidatesWithoutStackFixups.push_back(C);
9810 }
9811
9812 // Is SP used in the sequence at all? If not, we don't have to modify
9813 // the stack, so we are guaranteed to get the same frame.
9814 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
9815 NumBytesNoStackCalls += 12;
9816 C.setCallInfo(MachineOutlinerDefault, 12);
9817 CandidatesWithoutStackFixups.push_back(C);
9818 }
9819
9820 // If we outline this, we need to modify the stack. Pretend we don't
9821 // outline this by saving all of its bytes.
9822 else {
9823 NumBytesNoStackCalls += SequenceSize;
9824 }
9825 }
9826
9827 // If there are no places where we have to save LR, then note that we
9828 // don't have to update the stack. Otherwise, give every candidate the
9829 // default call type, as long as it's safe to do so.
9830 if (!AllStackInstrsSafe ||
9831 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
9832 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
9833 FrameID = MachineOutlinerNoLRSave;
9834 if (RepeatedSequenceLocs.size() < MinRepeats)
9835 return std::nullopt;
9836 } else {
9837 SetCandidateCallInfo(MachineOutlinerDefault, 12);
9838
9839 // Bugzilla ID: 46767
9840 // TODO: Check if fixing up the stack more than once is safe so we can
9841 // outline these.
9842 //
9843 // An outline resulting in a caller that requires stack fixups at the
9844 // callsite to a callee that also requires stack fixups can happen when
9845 // there are no available registers at the candidate callsite for a
9846 // candidate that itself also has calls.
9847 //
9848 // In other words if function_containing_sequence in the following pseudo
9849 // assembly requires that we save LR at the point of the call, but there
9850 // are no available registers: in this case we save using SP and as a
9851 // result the SP offsets requires stack fixups by multiples of 16.
9852 //
9853 // function_containing_sequence:
9854 // ...
9855 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9856 // call OUTLINED_FUNCTION_N
9857 // restore LR from SP
9858 // ...
9859 //
9860 // OUTLINED_FUNCTION_N:
9861 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
9862 // ...
9863 // bl foo
9864 // restore LR from SP
9865 // ret
9866 //
9867 // Because the code to handle more than one stack fixup does not
9868 // currently have the proper checks for legality, these cases will assert
9869 // in the AArch64 MachineOutliner. This is because the code to do this
9870 // needs more hardening, testing, better checks that generated code is
9871 // legal, etc and because it is only verified to handle a single pass of
9872 // stack fixup.
9873 //
9874 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
9875 // these cases until they are known to be handled. Bugzilla 46767 is
9876 // referenced in comments at the assert site.
9877 //
9878 // To avoid asserting (or generating non-legal code on noassert builds)
9879 // we remove all candidates which would need more than one stack fixup by
9880 // pruning the cases where the candidate has calls while also having no
9881 // available LR and having no available general purpose registers to copy
9882 // LR to (ie one extra stack save/restore).
9883 //
9884 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9885 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
9886 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
9887 return (llvm::any_of(C, IsCall)) &&
9888 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
9889 !findRegisterToSaveLRTo(C));
9890 });
9891 }
9892 }
9893
9894 // If we dropped all of the candidates, bail out here.
9895 if (RepeatedSequenceLocs.size() < MinRepeats)
9896 return std::nullopt;
9897 }
9898
9899 // Does every candidate's MBB contain a call? If so, then we might have a call
9900 // in the range.
9901 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
9902 // Check if the range contains a call. These require a save + restore of the
9903 // link register.
9904 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
9905 bool ModStackToSaveLR = false;
9906 if (any_of(drop_end(FirstCand),
9907 [](const MachineInstr &MI) { return MI.isCall(); }))
9908 ModStackToSaveLR = true;
9909
9910 // Handle the last instruction separately. If this is a tail call, then the
9911 // last instruction is a call. We don't want to save + restore in this case.
9912 // However, it could be possible that the last instruction is a call without
9913 // it being valid to tail call this sequence. We should consider this as
9914 // well.
9915 else if (FrameID != MachineOutlinerThunk &&
9916 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
9917 ModStackToSaveLR = true;
9918
9919 if (ModStackToSaveLR) {
9920 // We can't fix up the stack. Bail out.
9921 if (!AllStackInstrsSafe)
9922 return std::nullopt;
9923
9924 // Save + restore LR.
9925 NumBytesToCreateFrame += 8;
9926 }
9927 }
9928
9929 // If we have CFI instructions, we can only outline if the outlined section
9930 // can be a tail call
9931 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
9932 return std::nullopt;
9933
9934 return std::make_unique<outliner::OutlinedFunction>(
9935 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
9936}
9937
9938void AArch64InstrInfo::mergeOutliningCandidateAttributes(
9939 Function &F, std::vector<outliner::Candidate> &Candidates) const {
9940 // If a bunch of candidates reach this point they must agree on their return
9941 // address signing. It is therefore enough to just consider the signing
9942 // behaviour of one of them
9943 const auto &CFn = Candidates.front().getMF()->getFunction();
9944
9945 if (CFn.hasFnAttribute("ptrauth-returns"))
9946 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
9947 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
9948 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
9949 // Since all candidates belong to the same module, just copy the
9950 // function-level attributes of an arbitrary function.
9951 if (CFn.hasFnAttribute("sign-return-address"))
9952 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
9953 if (CFn.hasFnAttribute("sign-return-address-key"))
9954 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
9955
9956 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
9957}
9958
9959bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
9960 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
9961 const Function &F = MF.getFunction();
9962
9963 // Can F be deduplicated by the linker? If it can, don't outline from it.
9964 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
9965 return false;
9966
9967 // Don't outline from functions with section markings; the program could
9968 // expect that all the code is in the named section.
9969 // FIXME: Allow outlining from multiple functions with the same section
9970 // marking.
9971 if (F.hasSection())
9972 return false;
9973
9974 // Outlining from functions with redzones is unsafe since the outliner may
9975 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
9976 // outline from it.
9977 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
9978 if (!AFI || AFI->hasRedZone().value_or(true))
9979 return false;
9980
9981 // FIXME: Determine whether it is safe to outline from functions which contain
9982 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
9983 // outlined together and ensure it is safe to outline with async unwind info,
9984 // required for saving & restoring VG around calls.
9985 if (AFI->hasStreamingModeChanges())
9986 return false;
9987
9988 // FIXME: Teach the outliner to generate/handle Windows unwind info.
9990 return false;
9991
9992 // It's safe to outline from MF.
9993 return true;
9994}
9995
9997AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
9998 unsigned &Flags) const {
10000 "Must track liveness!");
10002 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
10003 Ranges;
10004 // According to the AArch64 Procedure Call Standard, the following are
10005 // undefined on entry/exit from a function call:
10006 //
10007 // * Registers x16, x17, (and thus w16, w17)
10008 // * Condition codes (and thus the NZCV register)
10009 //
10010 // If any of these registers are used inside or live across an outlined
10011 // function, then they may be modified later, either by the compiler or
10012 // some other tool (like the linker).
10013 //
10014 // To avoid outlining in these situations, partition each block into ranges
10015 // where these registers are dead. We will only outline from those ranges.
10016 LiveRegUnits LRU(getRegisterInfo());
10017 auto AreAllUnsafeRegsDead = [&LRU]() {
10018 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
10019 LRU.available(AArch64::NZCV);
10020 };
10021
10022 // We need to know if LR is live across an outlining boundary later on in
10023 // order to decide how we'll create the outlined call, frame, etc.
10024 //
10025 // It's pretty expensive to check this for *every candidate* within a block.
10026 // That's some potentially n^2 behaviour, since in the worst case, we'd need
10027 // to compute liveness from the end of the block for O(n) candidates within
10028 // the block.
10029 //
10030 // So, to improve the average case, let's keep track of liveness from the end
10031 // of the block to the beginning of *every outlinable range*. If we know that
10032 // LR is available in every range we could outline from, then we know that
10033 // we don't need to check liveness for any candidate within that range.
10034 bool LRAvailableEverywhere = true;
10035 // Compute liveness bottom-up.
10036 LRU.addLiveOuts(MBB);
10037 // Update flags that require info about the entire MBB.
10038 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
10039 if (MI.isCall() && !MI.isTerminator())
10041 };
10042 // Range: [RangeBegin, RangeEnd)
10043 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
10044 unsigned RangeLen;
10045 auto CreateNewRangeStartingAt =
10046 [&RangeBegin, &RangeEnd,
10047 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
10048 RangeBegin = NewBegin;
10049 RangeEnd = std::next(RangeBegin);
10050 RangeLen = 0;
10051 };
10052 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
10053 // At least one unsafe register is not dead. We do not want to outline at
10054 // this point. If it is long enough to outline from and does not cross a
10055 // bundle boundary, save the range [RangeBegin, RangeEnd).
10056 if (RangeLen <= 1)
10057 return;
10058 if (!RangeBegin.isEnd() && RangeBegin->isBundledWithPred())
10059 return;
10060 if (!RangeEnd.isEnd() && RangeEnd->isBundledWithPred())
10061 return;
10062 Ranges.emplace_back(RangeBegin, RangeEnd);
10063 };
10064 // Find the first point where all unsafe registers are dead.
10065 // FIND: <safe instr> <-- end of first potential range
10066 // SKIP: <unsafe def>
10067 // SKIP: ... everything between ...
10068 // SKIP: <unsafe use>
10069 auto FirstPossibleEndPt = MBB.instr_rbegin();
10070 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
10071 LRU.stepBackward(*FirstPossibleEndPt);
10072 // Update flags that impact how we outline across the entire block,
10073 // regardless of safety.
10074 UpdateWholeMBBFlags(*FirstPossibleEndPt);
10075 if (AreAllUnsafeRegsDead())
10076 break;
10077 }
10078 // If we exhausted the entire block, we have no safe ranges to outline.
10079 if (FirstPossibleEndPt == MBB.instr_rend())
10080 return Ranges;
10081 // Current range.
10082 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
10083 // StartPt points to the first place where all unsafe registers
10084 // are dead (if there is any such point). Begin partitioning the MBB into
10085 // ranges.
10086 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
10087 LRU.stepBackward(MI);
10088 UpdateWholeMBBFlags(MI);
10089 if (!AreAllUnsafeRegsDead()) {
10090 SaveRangeIfNonEmpty();
10091 CreateNewRangeStartingAt(MI.getIterator());
10092 continue;
10093 }
10094 LRAvailableEverywhere &= LRU.available(AArch64::LR);
10095 RangeBegin = MI.getIterator();
10096 ++RangeLen;
10097 }
10098 // Above loop misses the last (or only) range. If we are still safe, then
10099 // let's save the range.
10100 if (AreAllUnsafeRegsDead())
10101 SaveRangeIfNonEmpty();
10102 if (Ranges.empty())
10103 return Ranges;
10104 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
10105 // the order.
10106 std::reverse(Ranges.begin(), Ranges.end());
10107 // If there is at least one outlinable range where LR is unavailable
10108 // somewhere, remember that.
10109 if (!LRAvailableEverywhere)
10111 return Ranges;
10112}
10113
10115AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
10117 unsigned Flags) const {
10118 MachineInstr &MI = *MIT;
10119
10120 // Don't outline anything used for return address signing. The outlined
10121 // function will get signed later if needed
10122 switch (MI.getOpcode()) {
10123 case AArch64::PACM:
10124 case AArch64::PACIASP:
10125 case AArch64::PACIBSP:
10126 case AArch64::PACIASPPC:
10127 case AArch64::PACIBSPPC:
10128 case AArch64::AUTIASP:
10129 case AArch64::AUTIBSP:
10130 case AArch64::AUTIASPPCi:
10131 case AArch64::AUTIASPPCr:
10132 case AArch64::AUTIBSPPCi:
10133 case AArch64::AUTIBSPPCr:
10134 case AArch64::RETAA:
10135 case AArch64::RETAB:
10136 case AArch64::RETAASPPCi:
10137 case AArch64::RETAASPPCr:
10138 case AArch64::RETABSPPCi:
10139 case AArch64::RETABSPPCr:
10140 case AArch64::EMITBKEY:
10141 case AArch64::PAUTH_PROLOGUE:
10142 case AArch64::PAUTH_EPILOGUE:
10144 }
10145
10146 // We can only outline these if we will tail call the outlined function, or
10147 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
10148 // in a tail call.
10149 //
10150 // FIXME: If the proper fixups for the offset are implemented, this should be
10151 // possible.
10152 if (MI.isCFIInstruction())
10154
10155 // Is this a terminator for a basic block?
10156 if (MI.isTerminator())
10157 // TargetInstrInfo::getOutliningType has already filtered out anything
10158 // that would break this, so we can allow it here.
10160
10161 // Make sure none of the operands are un-outlinable.
10162 for (const MachineOperand &MOP : MI.operands()) {
10163 // A check preventing CFI indices was here before, but only CFI
10164 // instructions should have those.
10165 assert(!MOP.isCFIIndex());
10166
10167 // If it uses LR or W30 explicitly, then don't touch it.
10168 if (MOP.isReg() && !MOP.isImplicit() &&
10169 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
10171 }
10172
10173 // Special cases for instructions that can always be outlined, but will fail
10174 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
10175 // be outlined because they don't require a *specific* value to be in LR.
10176 if (MI.getOpcode() == AArch64::ADRP)
10178
10179 // If MI is a call we might be able to outline it. We don't want to outline
10180 // any calls that rely on the position of items on the stack. When we outline
10181 // something containing a call, we have to emit a save and restore of LR in
10182 // the outlined function. Currently, this always happens by saving LR to the
10183 // stack. Thus, if we outline, say, half the parameters for a function call
10184 // plus the call, then we'll break the callee's expectations for the layout
10185 // of the stack.
10186 //
10187 // FIXME: Allow calls to functions which construct a stack frame, as long
10188 // as they don't access arguments on the stack.
10189 // FIXME: Figure out some way to analyze functions defined in other modules.
10190 // We should be able to compute the memory usage based on the IR calling
10191 // convention, even if we can't see the definition.
10192 if (MI.isCall()) {
10193 // Get the function associated with the call. Look at each operand and find
10194 // the one that represents the callee and get its name.
10195 const Function *Callee = nullptr;
10196 for (const MachineOperand &MOP : MI.operands()) {
10197 if (MOP.isGlobal()) {
10198 Callee = dyn_cast<Function>(MOP.getGlobal());
10199 break;
10200 }
10201 }
10202
10203 // Never outline calls to mcount. There isn't any rule that would require
10204 // this, but the Linux kernel's "ftrace" feature depends on it.
10205 if (Callee && Callee->getName() == "\01_mcount")
10207
10208 // If we don't know anything about the callee, assume it depends on the
10209 // stack layout of the caller. In that case, it's only legal to outline
10210 // as a tail-call. Explicitly list the call instructions we know about so we
10211 // don't get unexpected results with call pseudo-instructions.
10212 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
10213 if (MI.getOpcode() == AArch64::BLR ||
10214 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
10215 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
10216
10217 if (!Callee)
10218 return UnknownCallOutlineType;
10219
10220 // We have a function we have information about. Check it if it's something
10221 // can safely outline.
10222 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
10223
10224 // We don't know what's going on with the callee at all. Don't touch it.
10225 if (!CalleeMF)
10226 return UnknownCallOutlineType;
10227
10228 // Check if we know anything about the callee saves on the function. If we
10229 // don't, then don't touch it, since that implies that we haven't
10230 // computed anything about its stack frame yet.
10231 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
10232 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
10233 MFI.getNumObjects() > 0)
10234 return UnknownCallOutlineType;
10235
10236 // At this point, we can say that CalleeMF ought to not pass anything on the
10237 // stack. Therefore, we can outline it.
10239 }
10240
10241 // Don't touch the link register or W30.
10242 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
10243 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
10245
10246 // Don't outline BTI instructions, because that will prevent the outlining
10247 // site from being indirectly callable.
10248 if (hasBTISemantics(MI))
10250
10252}
10253
10254void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
10255 for (MachineInstr &MI : MBB) {
10256 const MachineOperand *Base;
10257 TypeSize Width(0, false);
10258 int64_t Offset;
10259 bool OffsetIsScalable;
10260
10261 // Is this a load or store with an immediate offset with SP as the base?
10262 if (!MI.mayLoadOrStore() ||
10263 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
10264 &RI) ||
10265 (Base->isReg() && Base->getReg() != AArch64::SP))
10266 continue;
10267
10268 // It is, so we have to fix it up.
10269 TypeSize Scale(0U, false);
10270 int64_t Dummy1, Dummy2;
10271
10272 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
10273 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
10274 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
10275 assert(Scale != 0 && "Unexpected opcode!");
10276 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
10277
10278 // We've pushed the return address to the stack, so add 16 to the offset.
10279 // This is safe, since we already checked if it would overflow when we
10280 // checked if this instruction was legal to outline.
10281 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
10282 StackOffsetOperand.setImm(NewImm);
10283 }
10284}
10285
10287 const AArch64InstrInfo *TII,
10288 bool ShouldSignReturnAddr) {
10289 if (!ShouldSignReturnAddr)
10290 return;
10291
10292 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
10294 BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(),
10295 TII->get(AArch64::PAUTH_EPILOGUE))
10297}
10298
10299void AArch64InstrInfo::buildOutlinedFrame(
10301 const outliner::OutlinedFunction &OF) const {
10302
10303 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
10304
10305 if (OF.FrameConstructionID == MachineOutlinerTailCall)
10306 FI->setOutliningStyle("Tail Call");
10307 else if (OF.FrameConstructionID == MachineOutlinerThunk) {
10308 // For thunk outlining, rewrite the last instruction from a call to a
10309 // tail-call.
10310 MachineInstr *Call = &*--MBB.instr_end();
10311 unsigned TailOpcode;
10312 if (Call->getOpcode() == AArch64::BL) {
10313 TailOpcode = AArch64::TCRETURNdi;
10314 } else {
10315 assert(Call->getOpcode() == AArch64::BLR ||
10316 Call->getOpcode() == AArch64::BLRNoIP);
10317 TailOpcode = AArch64::TCRETURNriALL;
10318 }
10319 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
10320 .add(Call->getOperand(0))
10321 .addImm(0);
10322 MBB.insert(MBB.end(), TC);
10324
10325 FI->setOutliningStyle("Thunk");
10326 }
10327
10328 bool IsLeafFunction = true;
10329
10330 // Is there a call in the outlined range?
10331 auto IsNonTailCall = [](const MachineInstr &MI) {
10332 return MI.isCall() && !MI.isReturn();
10333 };
10334
10335 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
10336 // Fix up the instructions in the range, since we're going to modify the
10337 // stack.
10338
10339 // Bugzilla ID: 46767
10340 // TODO: Check if fixing up twice is safe so we can outline these.
10341 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
10342 "Can only fix up stack references once");
10343 fixupPostOutline(MBB);
10344
10345 IsLeafFunction = false;
10346
10347 // LR has to be a live in so that we can save it.
10348 if (!MBB.isLiveIn(AArch64::LR))
10349 MBB.addLiveIn(AArch64::LR);
10350
10353
10354 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10355 OF.FrameConstructionID == MachineOutlinerThunk)
10356 Et = std::prev(MBB.end());
10357
10358 // Insert a save before the outlined region
10359 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10360 .addReg(AArch64::SP, RegState::Define)
10361 .addReg(AArch64::LR)
10362 .addReg(AArch64::SP)
10363 .addImm(-16);
10364 It = MBB.insert(It, STRXpre);
10365
10366 if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
10367 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
10368
10369 // Add a CFI saying the stack was moved 16 B down.
10370 CFIBuilder.buildDefCFAOffset(16);
10371
10372 // Add a CFI saying that the LR that we want to find is now 16 B higher
10373 // than before.
10374 CFIBuilder.buildOffset(AArch64::LR, -16);
10375 }
10376
10377 // Insert a restore before the terminator for the function.
10378 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10379 .addReg(AArch64::SP, RegState::Define)
10380 .addReg(AArch64::LR, RegState::Define)
10381 .addReg(AArch64::SP)
10382 .addImm(16);
10383 Et = MBB.insert(Et, LDRXpost);
10384 }
10385
10386 bool ShouldSignReturnAddr = FI->shouldSignReturnAddress(!IsLeafFunction);
10387
10388 // If this is a tail call outlined function, then there's already a return.
10389 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10390 OF.FrameConstructionID == MachineOutlinerThunk) {
10391 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10392 return;
10393 }
10394
10395 // It's not a tail call, so we have to insert the return ourselves.
10396
10397 // LR has to be a live in so that we can return to it.
10398 if (!MBB.isLiveIn(AArch64::LR))
10399 MBB.addLiveIn(AArch64::LR);
10400
10401 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
10402 .addReg(AArch64::LR);
10403 MBB.insert(MBB.end(), ret);
10404
10405 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10406
10407 FI->setOutliningStyle("Function");
10408
10409 // Did we have to modify the stack by saving the link register?
10410 if (OF.FrameConstructionID != MachineOutlinerDefault)
10411 return;
10412
10413 // We modified the stack.
10414 // Walk over the basic block and fix up all the stack accesses.
10415 fixupPostOutline(MBB);
10416}
10417
10418MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
10421
10422 // Are we tail calling?
10423 if (C.CallConstructionID == MachineOutlinerTailCall) {
10424 // If yes, then we can just branch to the label.
10425 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
10426 .addGlobalAddress(M.getNamedValue(MF.getName()))
10427 .addImm(0));
10428 return It;
10429 }
10430
10431 // Are we saving the link register?
10432 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
10433 C.CallConstructionID == MachineOutlinerThunk) {
10434 // No, so just insert the call.
10435 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10436 .addGlobalAddress(M.getNamedValue(MF.getName())));
10437 return It;
10438 }
10439
10440 // We want to return the spot where we inserted the call.
10442
10443 // Instructions for saving and restoring LR around the call instruction we're
10444 // going to insert.
10445 MachineInstr *Save;
10446 MachineInstr *Restore;
10447 // Can we save to a register?
10448 if (C.CallConstructionID == MachineOutlinerRegSave) {
10449 // FIXME: This logic should be sunk into a target-specific interface so that
10450 // we don't have to recompute the register.
10451 Register Reg = findRegisterToSaveLRTo(C);
10452 assert(Reg && "No callee-saved register available?");
10453
10454 // LR has to be a live in so that we can save it.
10455 if (!MBB.isLiveIn(AArch64::LR))
10456 MBB.addLiveIn(AArch64::LR);
10457
10458 // Save and restore LR from Reg.
10459 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
10460 .addReg(AArch64::XZR)
10461 .addReg(AArch64::LR)
10462 .addImm(0);
10463 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
10464 .addReg(AArch64::XZR)
10465 .addReg(Reg)
10466 .addImm(0);
10467 } else {
10468 // We have the default case. Save and restore from SP.
10469 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10470 .addReg(AArch64::SP, RegState::Define)
10471 .addReg(AArch64::LR)
10472 .addReg(AArch64::SP)
10473 .addImm(-16);
10474 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10475 .addReg(AArch64::SP, RegState::Define)
10476 .addReg(AArch64::LR, RegState::Define)
10477 .addReg(AArch64::SP)
10478 .addImm(16);
10479 }
10480
10481 It = MBB.insert(It, Save);
10482 It++;
10483
10484 // Insert the call.
10485 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10486 .addGlobalAddress(M.getNamedValue(MF.getName())));
10487 CallPt = It;
10488 It++;
10489
10490 It = MBB.insert(It, Restore);
10491 return CallPt;
10492}
10493
10494bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
10495 MachineFunction &MF) const {
10496 return MF.getFunction().hasMinSize();
10497}
10498
10499void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
10501 DebugLoc &DL,
10502 bool AllowSideEffects) const {
10503 const MachineFunction &MF = *MBB.getParent();
10504 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
10505 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
10506
10507 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10508 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
10509 } else if (STI.isSVEorStreamingSVEAvailable()) {
10510 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
10511 .addImm(0)
10512 .addImm(0);
10513 } else if (STI.isNeonAvailable()) {
10514 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
10515 .addImm(0);
10516 } else {
10517 // This is a streaming-compatible function without SVE. We don't have full
10518 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
10519 // So given `movi v..` would be illegal use `fmov d..` instead.
10520 assert(STI.hasNEON() && "Expected to have NEON.");
10521 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
10522 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
10523 }
10524}
10525
10526std::optional<DestSourcePair>
10528
10529 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
10530 // and zero immediate operands used as an alias for mov instruction.
10531 if (((MI.getOpcode() == AArch64::ORRWrs &&
10532 MI.getOperand(1).getReg() == AArch64::WZR &&
10533 MI.getOperand(3).getImm() == 0x0) ||
10534 (MI.getOpcode() == AArch64::ORRWrr &&
10535 MI.getOperand(1).getReg() == AArch64::WZR)) &&
10536 // Check that the w->w move is not a zero-extending w->x mov.
10537 (!MI.getOperand(0).getReg().isVirtual() ||
10538 MI.getOperand(0).getSubReg() == 0) &&
10539 (!MI.getOperand(0).getReg().isPhysical() ||
10540 MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()),
10541 /*TRI=*/nullptr) == -1))
10542 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10543
10544 if (MI.getOpcode() == AArch64::ORRXrs &&
10545 MI.getOperand(1).getReg() == AArch64::XZR &&
10546 MI.getOperand(3).getImm() == 0x0)
10547 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10548
10549 return std::nullopt;
10550}
10551
10552std::optional<DestSourcePair>
10554 if ((MI.getOpcode() == AArch64::ORRWrs &&
10555 MI.getOperand(1).getReg() == AArch64::WZR &&
10556 MI.getOperand(3).getImm() == 0x0) ||
10557 (MI.getOpcode() == AArch64::ORRWrr &&
10558 MI.getOperand(1).getReg() == AArch64::WZR))
10559 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10560 return std::nullopt;
10561}
10562
10563std::optional<RegImmPair>
10564AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
10565 int Sign = 1;
10566 int64_t Offset = 0;
10567
10568 // TODO: Handle cases where Reg is a super- or sub-register of the
10569 // destination register.
10570 const MachineOperand &Op0 = MI.getOperand(0);
10571 if (!Op0.isReg() || Reg != Op0.getReg())
10572 return std::nullopt;
10573
10574 switch (MI.getOpcode()) {
10575 default:
10576 return std::nullopt;
10577 case AArch64::SUBWri:
10578 case AArch64::SUBXri:
10579 case AArch64::SUBSWri:
10580 case AArch64::SUBSXri:
10581 Sign *= -1;
10582 [[fallthrough]];
10583 case AArch64::ADDSWri:
10584 case AArch64::ADDSXri:
10585 case AArch64::ADDWri:
10586 case AArch64::ADDXri: {
10587 // TODO: Third operand can be global address (usually some string).
10588 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
10589 !MI.getOperand(2).isImm())
10590 return std::nullopt;
10591 int Shift = MI.getOperand(3).getImm();
10592 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
10593 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
10594 }
10595 }
10596 return RegImmPair{MI.getOperand(1).getReg(), Offset};
10597}
10598
10599/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
10600/// the destination register then, if possible, describe the value in terms of
10601/// the source register.
10602static std::optional<ParamLoadedValue>
10604 const TargetInstrInfo *TII,
10605 const TargetRegisterInfo *TRI) {
10606 auto DestSrc = TII->isCopyLikeInstr(MI);
10607 if (!DestSrc)
10608 return std::nullopt;
10609
10610 Register DestReg = DestSrc->Destination->getReg();
10611 Register SrcReg = DestSrc->Source->getReg();
10612
10613 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
10614
10615 // If the described register is the destination, just return the source.
10616 if (DestReg == DescribedReg)
10617 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10618
10619 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
10620 if (MI.getOpcode() == AArch64::ORRWrs &&
10621 TRI->isSuperRegister(DestReg, DescribedReg))
10622 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10623
10624 // We may need to describe the lower part of a ORRXrs move.
10625 if (MI.getOpcode() == AArch64::ORRXrs &&
10626 TRI->isSubRegister(DestReg, DescribedReg)) {
10627 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
10628 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
10629 }
10630
10631 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
10632 "Unhandled ORR[XW]rs copy case");
10633
10634 return std::nullopt;
10635}
10636
10637bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
10638 // Functions cannot be split to different sections on AArch64 if they have
10639 // a red zone. This is because relaxing a cross-section branch may require
10640 // incrementing the stack pointer to spill a register, which would overwrite
10641 // the red zone.
10642 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
10643 return false;
10644
10646}
10647
10648bool AArch64InstrInfo::isMBBSafeToSplitToCold(
10649 const MachineBasicBlock &MBB) const {
10650 // Asm Goto blocks can contain conditional branches to goto labels, which can
10651 // get moved out of range of the branch instruction.
10652 auto isAsmGoto = [](const MachineInstr &MI) {
10653 return MI.getOpcode() == AArch64::INLINEASM_BR;
10654 };
10655 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
10656 return false;
10657
10658 // Because jump tables are label-relative instead of table-relative, they all
10659 // must be in the same section or relocation fixup handling will fail.
10660
10661 // Check if MBB is a jump table target
10662 const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
10663 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
10664 return llvm::is_contained(JTE.MBBs, &MBB);
10665 };
10666 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
10667 return false;
10668
10669 // Check if MBB contains a jump table lookup
10670 for (const MachineInstr &MI : MBB) {
10671 switch (MI.getOpcode()) {
10672 case TargetOpcode::G_BRJT:
10673 case AArch64::JumpTableDest32:
10674 case AArch64::JumpTableDest16:
10675 case AArch64::JumpTableDest8:
10676 return false;
10677 default:
10678 continue;
10679 }
10680 }
10681
10682 // MBB isn't a special case, so it's safe to be split to the cold section.
10683 return true;
10684}
10685
10686std::optional<ParamLoadedValue>
10687AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
10688 Register Reg) const {
10689 const MachineFunction *MF = MI.getMF();
10690 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
10691 switch (MI.getOpcode()) {
10692 case AArch64::MOVZWi:
10693 case AArch64::MOVZXi: {
10694 // MOVZWi may be used for producing zero-extended 32-bit immediates in
10695 // 64-bit parameters, so we need to consider super-registers.
10696 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
10697 return std::nullopt;
10698
10699 if (!MI.getOperand(1).isImm())
10700 return std::nullopt;
10701 int64_t Immediate = MI.getOperand(1).getImm();
10702 int Shift = MI.getOperand(2).getImm();
10703 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
10704 nullptr);
10705 }
10706 case AArch64::ORRWrs:
10707 case AArch64::ORRXrs:
10708 return describeORRLoadedValue(MI, Reg, this, TRI);
10709 }
10710
10712}
10713
10714bool AArch64InstrInfo::isExtendLikelyToBeFolded(
10715 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
10716 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
10717 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
10718 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
10719
10720 // Anyexts are nops.
10721 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
10722 return true;
10723
10724 Register DefReg = ExtMI.getOperand(0).getReg();
10725 if (!MRI.hasOneNonDBGUse(DefReg))
10726 return false;
10727
10728 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
10729 // addressing mode.
10730 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
10731 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
10732}
10733
10734uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
10735 return get(Opc).TSFlags & AArch64::ElementSizeMask;
10736}
10737
10738bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
10739 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
10740}
10741
10742bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
10743 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
10744}
10745
10746unsigned int
10747AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
10748 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
10749}
10750
10751bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
10752 unsigned Scale) const {
10753 if (Offset && Scale)
10754 return false;
10755
10756 // Check Reg + Imm
10757 if (!Scale) {
10758 // 9-bit signed offset
10759 if (isInt<9>(Offset))
10760 return true;
10761
10762 // 12-bit unsigned offset
10763 unsigned Shift = Log2_64(NumBytes);
10764 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
10765 // Must be a multiple of NumBytes (NumBytes is a power of 2)
10766 (Offset >> Shift) << Shift == Offset)
10767 return true;
10768 return false;
10769 }
10770
10771 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
10772 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
10773}
10774
10776 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
10777 return AArch64::BLRNoIP;
10778 else
10779 return AArch64::BLR;
10780}
10781
10784 Register TargetReg, bool FrameSetup) const {
10785 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
10786
10787 MachineBasicBlock &MBB = *MBBI->getParent();
10788 MachineFunction &MF = *MBB.getParent();
10789 const AArch64InstrInfo *TII =
10790 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
10791 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
10792 DebugLoc DL = MBB.findDebugLoc(MBBI);
10793
10794 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
10795 MachineBasicBlock *LoopTestMBB =
10796 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10797 MF.insert(MBBInsertPoint, LoopTestMBB);
10798 MachineBasicBlock *LoopBodyMBB =
10799 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10800 MF.insert(MBBInsertPoint, LoopBodyMBB);
10801 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
10802 MF.insert(MBBInsertPoint, ExitMBB);
10803 MachineInstr::MIFlag Flags =
10805
10806 // LoopTest:
10807 // SUB SP, SP, #ProbeSize
10808 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
10809 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
10810
10811 // CMP SP, TargetReg
10812 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
10813 AArch64::XZR)
10814 .addReg(AArch64::SP)
10815 .addReg(TargetReg)
10817 .setMIFlags(Flags);
10818
10819 // B.<Cond> LoopExit
10820 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
10822 .addMBB(ExitMBB)
10823 .setMIFlags(Flags);
10824
10825 // STR XZR, [SP]
10826 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
10827 .addReg(AArch64::XZR)
10828 .addReg(AArch64::SP)
10829 .addImm(0)
10830 .setMIFlags(Flags);
10831
10832 // B loop
10833 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
10834 .addMBB(LoopTestMBB)
10835 .setMIFlags(Flags);
10836
10837 // LoopExit:
10838 // MOV SP, TargetReg
10839 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
10840 .addReg(TargetReg)
10841 .addImm(0)
10843 .setMIFlags(Flags);
10844
10845 // LDR XZR, [SP]
10846 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
10847 .addReg(AArch64::XZR, RegState::Define)
10848 .addReg(AArch64::SP)
10849 .addImm(0)
10850 .setMIFlags(Flags);
10851
10852 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
10854
10855 LoopTestMBB->addSuccessor(ExitMBB);
10856 LoopTestMBB->addSuccessor(LoopBodyMBB);
10857 LoopBodyMBB->addSuccessor(LoopTestMBB);
10858 MBB.addSuccessor(LoopTestMBB);
10859
10860 // Update liveins.
10861 if (MF.getRegInfo().reservedRegsFrozen())
10862 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
10863
10864 return ExitMBB->begin();
10865}
10866
10867namespace {
10868class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
10869 MachineFunction *MF;
10870 const TargetInstrInfo *TII;
10871 const TargetRegisterInfo *TRI;
10873
10874 /// The block of the loop
10875 MachineBasicBlock *LoopBB;
10876 /// The conditional branch of the loop
10877 MachineInstr *CondBranch;
10878 /// The compare instruction for loop control
10879 MachineInstr *Comp;
10880 /// The number of the operand of the loop counter value in Comp
10881 unsigned CompCounterOprNum;
10882 /// The instruction that updates the loop counter value
10883 MachineInstr *Update;
10884 /// The number of the operand of the loop counter value in Update
10885 unsigned UpdateCounterOprNum;
10886 /// The initial value of the loop counter
10887 Register Init;
10888 /// True iff Update is a predecessor of Comp
10889 bool IsUpdatePriorComp;
10890
10891 /// The normalized condition used by createTripCountGreaterCondition()
10893
10894public:
10895 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
10896 MachineInstr *Comp, unsigned CompCounterOprNum,
10897 MachineInstr *Update, unsigned UpdateCounterOprNum,
10898 Register Init, bool IsUpdatePriorComp,
10900 : MF(Comp->getParent()->getParent()),
10901 TII(MF->getSubtarget().getInstrInfo()),
10902 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
10903 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
10904 CompCounterOprNum(CompCounterOprNum), Update(Update),
10905 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
10906 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
10907
10908 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
10909 // Make the instructions for loop control be placed in stage 0.
10910 // The predecessors of Comp are considered by the caller.
10911 return MI == Comp;
10912 }
10913
10914 std::optional<bool> createTripCountGreaterCondition(
10915 int TC, MachineBasicBlock &MBB,
10916 SmallVectorImpl<MachineOperand> &CondParam) override {
10917 // A branch instruction will be inserted as "if (Cond) goto epilogue".
10918 // Cond is normalized for such use.
10919 // The predecessors of the branch are assumed to have already been inserted.
10920 CondParam = Cond;
10921 return {};
10922 }
10923
10924 void createRemainingIterationsGreaterCondition(
10925 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
10926 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
10927
10928 void setPreheader(MachineBasicBlock *NewPreheader) override {}
10929
10930 void adjustTripCount(int TripCountAdjust) override {}
10931
10932 bool isMVEExpanderSupported() override { return true; }
10933};
10934} // namespace
10935
10936/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
10937/// is replaced by ReplaceReg. The output register is newly created.
10938/// The other operands are unchanged from MI.
10939static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
10940 Register ReplaceReg, MachineBasicBlock &MBB,
10941 MachineBasicBlock::iterator InsertTo) {
10942 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
10943 const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
10944 const TargetRegisterInfo *TRI =
10945 MBB.getParent()->getSubtarget().getRegisterInfo();
10946 MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
10947 Register Result = 0;
10948 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
10949 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
10950 Result = MRI.createVirtualRegister(
10951 MRI.getRegClass(NewMI->getOperand(0).getReg()));
10952 NewMI->getOperand(I).setReg(Result);
10953 } else if (I == ReplaceOprNum) {
10954 MRI.constrainRegClass(ReplaceReg,
10955 TII->getRegClass(NewMI->getDesc(), I, TRI));
10956 NewMI->getOperand(I).setReg(ReplaceReg);
10957 }
10958 }
10959 MBB.insert(InsertTo, NewMI);
10960 return Result;
10961}
10962
10963void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
10966 // Create and accumulate conditions for next TC iterations.
10967 // Example:
10968 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
10969 // # iteration of the kernel
10970 //
10971 // # insert the following instructions
10972 // cond = CSINCXr 0, 0, C, implicit $nzcv
10973 // counter = ADDXri counter, 1 # clone from this->Update
10974 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
10975 // cond = CSINCXr cond, cond, C, implicit $nzcv
10976 // ... (repeat TC times)
10977 // SUBSXri cond, 0, implicit-def $nzcv
10978
10979 assert(CondBranch->getOpcode() == AArch64::Bcc);
10980 // CondCode to exit the loop
10982 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
10983 if (CondBranch->getOperand(1).getMBB() == LoopBB)
10985
10986 // Accumulate conditions to exit the loop
10987 Register AccCond = AArch64::XZR;
10988
10989 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
10990 auto AccumulateCond = [&](Register CurCond,
10992 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
10993 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
10994 .addReg(NewCond, RegState::Define)
10995 .addReg(CurCond)
10996 .addReg(CurCond)
10998 return NewCond;
10999 };
11000
11001 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
11002 // Update and Comp for I==0 are already exists in MBB
11003 // (MBB is an unrolled kernel)
11004 Register Counter;
11005 for (int I = 0; I <= TC; ++I) {
11006 Register NextCounter;
11007 if (I != 0)
11008 NextCounter =
11009 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11010
11011 AccCond = AccumulateCond(AccCond, CC);
11012
11013 if (I != TC) {
11014 if (I == 0) {
11015 if (Update != Comp && IsUpdatePriorComp) {
11016 Counter =
11017 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11018 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
11019 MBB.end());
11020 } else {
11021 // can use already calculated value
11022 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
11023 }
11024 } else if (Update != Comp) {
11025 NextCounter =
11026 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11027 }
11028 }
11029 Counter = NextCounter;
11030 }
11031 } else {
11032 Register Counter;
11033 if (LastStage0Insts.empty()) {
11034 // use initial counter value (testing if the trip count is sufficient to
11035 // be executed by pipelined code)
11036 Counter = Init;
11037 if (IsUpdatePriorComp)
11038 Counter =
11039 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11040 } else {
11041 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
11042 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11043 }
11044
11045 for (int I = 0; I <= TC; ++I) {
11046 Register NextCounter;
11047 NextCounter =
11048 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11049 AccCond = AccumulateCond(AccCond, CC);
11050 if (I != TC && Update != Comp)
11051 NextCounter =
11052 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11053 Counter = NextCounter;
11054 }
11055 }
11056
11057 // If AccCond == 0, the remainder is greater than TC.
11058 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
11059 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
11060 .addReg(AccCond)
11061 .addImm(0)
11062 .addImm(0);
11063 Cond.clear();
11065}
11066
11067static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
11068 Register &RegMBB, Register &RegOther) {
11069 assert(Phi.getNumOperands() == 5);
11070 if (Phi.getOperand(2).getMBB() == MBB) {
11071 RegMBB = Phi.getOperand(1).getReg();
11072 RegOther = Phi.getOperand(3).getReg();
11073 } else {
11074 assert(Phi.getOperand(4).getMBB() == MBB);
11075 RegMBB = Phi.getOperand(3).getReg();
11076 RegOther = Phi.getOperand(1).getReg();
11077 }
11078}
11079
11081 if (!Reg.isVirtual())
11082 return false;
11083 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
11084 return MRI.getVRegDef(Reg)->getParent() != BB;
11085}
11086
11087/// If Reg is an induction variable, return true and set some parameters
11088static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
11089 MachineInstr *&UpdateInst,
11090 unsigned &UpdateCounterOprNum, Register &InitReg,
11091 bool &IsUpdatePriorComp) {
11092 // Example:
11093 //
11094 // Preheader:
11095 // InitReg = ...
11096 // LoopBB:
11097 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
11098 // Reg = COPY Reg0 ; COPY is ignored.
11099 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
11100 // ; Reg is the value calculated in the previous
11101 // ; iteration, so IsUpdatePriorComp == false.
11102
11103 if (LoopBB->pred_size() != 2)
11104 return false;
11105 if (!Reg.isVirtual())
11106 return false;
11107 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
11108 UpdateInst = nullptr;
11109 UpdateCounterOprNum = 0;
11110 InitReg = 0;
11111 IsUpdatePriorComp = true;
11112 Register CurReg = Reg;
11113 while (true) {
11114 MachineInstr *Def = MRI.getVRegDef(CurReg);
11115 if (Def->getParent() != LoopBB)
11116 return false;
11117 if (Def->isCopy()) {
11118 // Ignore copy instructions unless they contain subregisters
11119 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
11120 return false;
11121 CurReg = Def->getOperand(1).getReg();
11122 } else if (Def->isPHI()) {
11123 if (InitReg != 0)
11124 return false;
11125 if (!UpdateInst)
11126 IsUpdatePriorComp = false;
11127 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
11128 } else {
11129 if (UpdateInst)
11130 return false;
11131 switch (Def->getOpcode()) {
11132 case AArch64::ADDSXri:
11133 case AArch64::ADDSWri:
11134 case AArch64::SUBSXri:
11135 case AArch64::SUBSWri:
11136 case AArch64::ADDXri:
11137 case AArch64::ADDWri:
11138 case AArch64::SUBXri:
11139 case AArch64::SUBWri:
11140 UpdateInst = Def;
11141 UpdateCounterOprNum = 1;
11142 break;
11143 case AArch64::ADDSXrr:
11144 case AArch64::ADDSWrr:
11145 case AArch64::SUBSXrr:
11146 case AArch64::SUBSWrr:
11147 case AArch64::ADDXrr:
11148 case AArch64::ADDWrr:
11149 case AArch64::SUBXrr:
11150 case AArch64::SUBWrr:
11151 UpdateInst = Def;
11152 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
11153 UpdateCounterOprNum = 1;
11154 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
11155 UpdateCounterOprNum = 2;
11156 else
11157 return false;
11158 break;
11159 default:
11160 return false;
11161 }
11162 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
11163 }
11164
11165 if (!CurReg.isVirtual())
11166 return false;
11167 if (Reg == CurReg)
11168 break;
11169 }
11170
11171 if (!UpdateInst)
11172 return false;
11173
11174 return true;
11175}
11176
11177std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
11179 // Accept loops that meet the following conditions
11180 // * The conditional branch is BCC
11181 // * The compare instruction is ADDS/SUBS/WHILEXX
11182 // * One operand of the compare is an induction variable and the other is a
11183 // loop invariant value
11184 // * The induction variable is incremented/decremented by a single instruction
11185 // * Does not contain CALL or instructions which have unmodeled side effects
11186
11187 for (MachineInstr &MI : *LoopBB)
11188 if (MI.isCall() || MI.hasUnmodeledSideEffects())
11189 // This instruction may use NZCV, which interferes with the instruction to
11190 // be inserted for loop control.
11191 return nullptr;
11192
11193 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
11195 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
11196 return nullptr;
11197
11198 // Infinite loops are not supported
11199 if (TBB == LoopBB && FBB == LoopBB)
11200 return nullptr;
11201
11202 // Must be conditional branch
11203 if (TBB != LoopBB && FBB == nullptr)
11204 return nullptr;
11205
11206 assert((TBB == LoopBB || FBB == LoopBB) &&
11207 "The Loop must be a single-basic-block loop");
11208
11209 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
11211
11212 if (CondBranch->getOpcode() != AArch64::Bcc)
11213 return nullptr;
11214
11215 // Normalization for createTripCountGreaterCondition()
11216 if (TBB == LoopBB)
11218
11219 MachineInstr *Comp = nullptr;
11220 unsigned CompCounterOprNum = 0;
11221 for (MachineInstr &MI : reverse(*LoopBB)) {
11222 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
11223 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
11224 // operands is a loop invariant value
11225
11226 switch (MI.getOpcode()) {
11227 case AArch64::SUBSXri:
11228 case AArch64::SUBSWri:
11229 case AArch64::ADDSXri:
11230 case AArch64::ADDSWri:
11231 Comp = &MI;
11232 CompCounterOprNum = 1;
11233 break;
11234 case AArch64::ADDSWrr:
11235 case AArch64::ADDSXrr:
11236 case AArch64::SUBSWrr:
11237 case AArch64::SUBSXrr:
11238 Comp = &MI;
11239 break;
11240 default:
11241 if (isWhileOpcode(MI.getOpcode())) {
11242 Comp = &MI;
11243 break;
11244 }
11245 return nullptr;
11246 }
11247
11248 if (CompCounterOprNum == 0) {
11249 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
11250 CompCounterOprNum = 2;
11251 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
11252 CompCounterOprNum = 1;
11253 else
11254 return nullptr;
11255 }
11256 break;
11257 }
11258 }
11259 if (!Comp)
11260 return nullptr;
11261
11262 MachineInstr *Update = nullptr;
11263 Register Init;
11264 bool IsUpdatePriorComp;
11265 unsigned UpdateCounterOprNum;
11266 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
11267 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
11268 return nullptr;
11269
11270 return std::make_unique<AArch64PipelinerLoopInfo>(
11271 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
11272 Init, IsUpdatePriorComp, Cond);
11273}
11274
11275/// verifyInstruction - Perform target specific instruction verification.
11276bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
11277 StringRef &ErrInfo) const {
11278 // Verify that immediate offsets on load/store instructions are within range.
11279 // Stack objects with an FI operand are excluded as they can be fixed up
11280 // during PEI.
11281 TypeSize Scale(0U, false), Width(0U, false);
11282 int64_t MinOffset, MaxOffset;
11283 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
11284 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
11285 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
11286 int64_t Imm = MI.getOperand(ImmIdx).getImm();
11287 if (Imm < MinOffset || Imm > MaxOffset) {
11288 ErrInfo = "Unexpected immediate on load/store instruction";
11289 return false;
11290 }
11291 }
11292 }
11293
11294 const MCInstrDesc &MCID = MI.getDesc();
11295 for (unsigned Op = 0; Op < MCID.getNumOperands(); Op++) {
11296 const MachineOperand &MO = MI.getOperand(Op);
11297 switch (MCID.operands()[Op].OperandType) {
11299 if (!MO.isImm() || MO.getImm() != 0) {
11300 ErrInfo = "OPERAND_IMPLICIT_IMM_0 should be 0";
11301 return false;
11302 }
11303 break;
11305 if (!MO.isImm() ||
11307 (AArch64_AM::getShiftValue(MO.getImm()) != 8 &&
11308 AArch64_AM::getShiftValue(MO.getImm()) != 16)) {
11309 ErrInfo = "OPERAND_SHIFT_MSL should be msl shift of 8 or 16";
11310 return false;
11311 }
11312 break;
11313 default:
11314 break;
11315 }
11316 }
11317 return true;
11318}
11319
11320#define GET_INSTRINFO_HELPERS
11321#define GET_INSTRMAP_INFO
11322#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static cl::opt< unsigned > GatherOptSearchLimit("aarch64-search-limit", cl::Hidden, cl::init(2048), cl::desc("Restrict range of instructions to search for the " "machine-combiner gather pattern optimization"))
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate64(unsigned Opc)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, StringRef RegScale={})
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static unsigned getBranchDisplacementBits(unsigned Opc)
static cl::opt< unsigned > CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), cl::desc("Restrict range of CB instructions (DEBUG)"))
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static void appendReadRegExpr(SmallVectorImpl< char > &Expr, unsigned RegNum)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool getLoadPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Search for patterns of LD instructions we can optimize.
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static void appendLoadRegExpr(SmallVectorImpl< char > &Expr, int64_t OffsetFromDefCFA)
static void appendConstantExpr(SmallVectorImpl< char > &Expr, int64_t Constant, dwarf::LocationAtom Operation)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static void generateGatherLanePattern(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern, unsigned NumLanes)
Generate optimized instruction sequence for gather load patterns to improve Memory-Level Parallelism ...
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool getGatherLanePattern(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, unsigned LoadLaneOpCode, unsigned NumLanes)
Check if the given instruction forms a gather load pattern that can be optimized for better Memory-Le...
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewVReg=nullptr)
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
@ Default
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
PowerPC Reduce CR logical Operation
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
bool shouldSignReturnAddress(const MachineFunction &MF) const
void setOutliningStyle(const std::string &Style)
std::optional< bool > hasRedZone() const
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isAsCheapAsAMove(const MachineInstr &MI) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, const TargetRegisterInfo *TRI, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
const T & front() const
front - Get the first element.
Definition ArrayRef.h:150
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
This is an important base class in LLVM.
Definition Constant.h:43
A debug info location.
Definition DebugLoc.h:124
bool empty() const
Definition DenseMap.h:109
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:222
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:585
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:627
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:600
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:697
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:33
constexpr bool isValid() const
Definition MCRegister.h:76
static constexpr unsigned NoRegister
Definition MCRegister.h:52
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI bool isLoadFoldBarrier() const
Returns true if it is illegal to fold a load across this instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
Wrapper class representing virtual and physical registers.
Definition Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:102
constexpr bool isValid() const
Definition Register.h:107
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:61
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:78
Represents a location in source code.
Definition SMLoc.h:23
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:199
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:31
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:47
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:50
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition TypeSize.h:42
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:41
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:40
MI-level Statepoint operands.
Definition StackMaps.h:159
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition StackMaps.h:208
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
TargetOptions Options
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:344
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:347
Value * getOperand(unsigned i) const
Definition User.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:169
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:166
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an std::string.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_S
MO_S - Indicates that the bits of the symbol operand represented by MO_G0 etc are signed.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_ARM64EC_CALLMANGLE
MO_ARM64EC_CALLMANGLE - Operand refers to the Arm64EC-mangled version of a symbol,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
constexpr double e
Definition MathExtras.h:47
InstrType
Represents how an instruction should be mapped by the outliner.
LLVM_ABI Instruction & back() const
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1727
static bool isCondBranchOpcode(int Opc)
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2474
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:644
static bool isIndirectBranchOpcode(int Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:293
Op::Description Desc
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1734
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1624
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ FMLAv4i32_indexed_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:325
unsigned getUndefRegState(bool B)
static MCRegister getXRegFromWReg(MCRegister Reg)
unsigned getDefRegState(bool B)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
unsigned getKillRegState(bool B)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:560
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2122
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:257
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1899
static const MachineMemOperand::Flags MOSuppressPair
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:583
void appendLEB128(SmallVectorImpl< U > &Buffer, T Value)
Definition LEB128.h:238
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static const MachineMemOperand::Flags MOStridedAccess
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
LLVM_ABI static const MBBSectionID ColdSectionID
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.