LLVM 22.0.0git
AArch64InstrInfo.cpp
Go to the documentation of this file.
1//===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains the AArch64 implementation of the TargetInstrInfo class.
10//
11//===----------------------------------------------------------------------===//
12
13#include "AArch64InstrInfo.h"
14#include "AArch64ExpandImm.h"
16#include "AArch64PointerAuth.h"
17#include "AArch64Subtarget.h"
21#include "llvm/ADT/ArrayRef.h"
22#include "llvm/ADT/STLExtras.h"
23#include "llvm/ADT/SmallSet.h"
43#include "llvm/IR/DebugLoc.h"
44#include "llvm/IR/GlobalValue.h"
45#include "llvm/IR/Module.h"
46#include "llvm/MC/MCAsmInfo.h"
47#include "llvm/MC/MCInst.h"
49#include "llvm/MC/MCInstrDesc.h"
54#include "llvm/Support/LEB128.h"
58#include <cassert>
59#include <cstdint>
60#include <iterator>
61#include <utility>
62
63using namespace llvm;
64
65#define GET_INSTRINFO_CTOR_DTOR
66#include "AArch64GenInstrInfo.inc"
67
69 CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9),
70 cl::desc("Restrict range of CB instructions (DEBUG)"));
71
73 "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
74 cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
75
77 "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
78 cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
79
81 BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
82 cl::desc("Restrict range of Bcc instructions (DEBUG)"));
83
85 BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26),
86 cl::desc("Restrict range of B instructions (DEBUG)"));
87
89 "aarch64-search-limit", cl::Hidden, cl::init(2048),
90 cl::desc("Restrict range of instructions to search for the "
91 "machine-combiner gather pattern optimization"));
92
94 : AArch64GenInstrInfo(STI, RI, AArch64::ADJCALLSTACKDOWN,
95 AArch64::ADJCALLSTACKUP, AArch64::CATCHRET),
96 RI(STI.getTargetTriple(), STI.getHwMode()), Subtarget(STI) {}
97
98/// GetInstSize - Return the number of bytes of code the specified
99/// instruction may be. This returns the maximum number of bytes.
101 const MachineBasicBlock &MBB = *MI.getParent();
102 const MachineFunction *MF = MBB.getParent();
103 const Function &F = MF->getFunction();
104 const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
105
106 {
107 auto Op = MI.getOpcode();
108 if (Op == AArch64::INLINEASM || Op == AArch64::INLINEASM_BR)
109 return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
110 }
111
112 // Meta-instructions emit no code.
113 if (MI.isMetaInstruction())
114 return 0;
115
116 // FIXME: We currently only handle pseudoinstructions that don't get expanded
117 // before the assembly printer.
118 unsigned NumBytes = 0;
119 const MCInstrDesc &Desc = MI.getDesc();
120
121 if (!MI.isBundle() && isTailCallReturnInst(MI)) {
122 NumBytes = Desc.getSize() ? Desc.getSize() : 4;
123
124 const auto *MFI = MF->getInfo<AArch64FunctionInfo>();
125 if (!MFI->shouldSignReturnAddress(*MF))
126 return NumBytes;
127
128 const auto &STI = MF->getSubtarget<AArch64Subtarget>();
129 auto Method = STI.getAuthenticatedLRCheckMethod(*MF);
130 NumBytes += AArch64PAuth::getCheckerSizeInBytes(Method);
131 return NumBytes;
132 }
133
134 // Size should be preferably set in
135 // llvm/lib/Target/AArch64/AArch64InstrInfo.td (default case).
136 // Specific cases handle instructions of variable sizes
137 switch (Desc.getOpcode()) {
138 default:
139 if (Desc.getSize())
140 return Desc.getSize();
141
142 // Anything not explicitly designated otherwise (i.e. pseudo-instructions
143 // with fixed constant size but not specified in .td file) is a normal
144 // 4-byte insn.
145 NumBytes = 4;
146 break;
147 case TargetOpcode::STACKMAP:
148 // The upper bound for a stackmap intrinsic is the full length of its shadow
149 NumBytes = StackMapOpers(&MI).getNumPatchBytes();
150 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
151 break;
152 case TargetOpcode::PATCHPOINT:
153 // The size of the patchpoint intrinsic is the number of bytes requested
154 NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
155 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
156 break;
157 case TargetOpcode::STATEPOINT:
158 NumBytes = StatepointOpers(&MI).getNumPatchBytes();
159 assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
160 // No patch bytes means a normal call inst is emitted
161 if (NumBytes == 0)
162 NumBytes = 4;
163 break;
164 case TargetOpcode::PATCHABLE_FUNCTION_ENTER:
165 // If `patchable-function-entry` is set, PATCHABLE_FUNCTION_ENTER
166 // instructions are expanded to the specified number of NOPs. Otherwise,
167 // they are expanded to 36-byte XRay sleds.
168 NumBytes =
169 F.getFnAttributeAsParsedInteger("patchable-function-entry", 9) * 4;
170 break;
171 case TargetOpcode::PATCHABLE_FUNCTION_EXIT:
172 case TargetOpcode::PATCHABLE_TAIL_CALL:
173 case TargetOpcode::PATCHABLE_TYPED_EVENT_CALL:
174 // An XRay sled can be 4 bytes of alignment plus a 32-byte block.
175 NumBytes = 36;
176 break;
177 case TargetOpcode::PATCHABLE_EVENT_CALL:
178 // EVENT_CALL XRay sleds are exactly 6 instructions long (no alignment).
179 NumBytes = 24;
180 break;
181
182 case AArch64::SPACE:
183 NumBytes = MI.getOperand(1).getImm();
184 break;
185 case TargetOpcode::BUNDLE:
186 NumBytes = getInstBundleLength(MI);
187 break;
188 }
189
190 return NumBytes;
191}
192
193unsigned AArch64InstrInfo::getInstBundleLength(const MachineInstr &MI) const {
194 unsigned Size = 0;
196 MachineBasicBlock::const_instr_iterator E = MI.getParent()->instr_end();
197 while (++I != E && I->isInsideBundle()) {
198 assert(!I->isBundle() && "No nested bundle!");
200 }
201 return Size;
202}
203
206 // Block ends with fall-through condbranch.
207 switch (LastInst->getOpcode()) {
208 default:
209 llvm_unreachable("Unknown branch instruction?");
210 case AArch64::Bcc:
211 Target = LastInst->getOperand(1).getMBB();
212 Cond.push_back(LastInst->getOperand(0));
213 break;
214 case AArch64::CBZW:
215 case AArch64::CBZX:
216 case AArch64::CBNZW:
217 case AArch64::CBNZX:
218 Target = LastInst->getOperand(1).getMBB();
219 Cond.push_back(MachineOperand::CreateImm(-1));
220 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
221 Cond.push_back(LastInst->getOperand(0));
222 break;
223 case AArch64::TBZW:
224 case AArch64::TBZX:
225 case AArch64::TBNZW:
226 case AArch64::TBNZX:
227 Target = LastInst->getOperand(2).getMBB();
228 Cond.push_back(MachineOperand::CreateImm(-1));
229 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
230 Cond.push_back(LastInst->getOperand(0));
231 Cond.push_back(LastInst->getOperand(1));
232 break;
233 case AArch64::CBWPri:
234 case AArch64::CBXPri:
235 case AArch64::CBWPrr:
236 case AArch64::CBXPrr:
237 Target = LastInst->getOperand(3).getMBB();
238 Cond.push_back(MachineOperand::CreateImm(-1));
239 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
240 Cond.push_back(LastInst->getOperand(0));
241 Cond.push_back(LastInst->getOperand(1));
242 Cond.push_back(LastInst->getOperand(2));
243 break;
244 case AArch64::CBBAssertExt:
245 case AArch64::CBHAssertExt:
246 Target = LastInst->getOperand(3).getMBB();
247 Cond.push_back(MachineOperand::CreateImm(-1)); // -1
248 Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode())); // Opc
249 Cond.push_back(LastInst->getOperand(0)); // Cond
250 Cond.push_back(LastInst->getOperand(1)); // Op0
251 Cond.push_back(LastInst->getOperand(2)); // Op1
252 Cond.push_back(LastInst->getOperand(4)); // Ext0
253 Cond.push_back(LastInst->getOperand(5)); // Ext1
254 break;
255 }
256}
257
258static unsigned getBranchDisplacementBits(unsigned Opc) {
259 switch (Opc) {
260 default:
261 llvm_unreachable("unexpected opcode!");
262 case AArch64::B:
263 return BDisplacementBits;
264 case AArch64::TBNZW:
265 case AArch64::TBZW:
266 case AArch64::TBNZX:
267 case AArch64::TBZX:
268 return TBZDisplacementBits;
269 case AArch64::CBNZW:
270 case AArch64::CBZW:
271 case AArch64::CBNZX:
272 case AArch64::CBZX:
273 return CBZDisplacementBits;
274 case AArch64::Bcc:
275 return BCCDisplacementBits;
276 case AArch64::CBWPri:
277 case AArch64::CBXPri:
278 case AArch64::CBBAssertExt:
279 case AArch64::CBHAssertExt:
280 case AArch64::CBWPrr:
281 case AArch64::CBXPrr:
282 return CBDisplacementBits;
283 }
284}
285
287 int64_t BrOffset) const {
288 unsigned Bits = getBranchDisplacementBits(BranchOp);
289 assert(Bits >= 3 && "max branch displacement must be enough to jump"
290 "over conditional branch expansion");
291 return isIntN(Bits, BrOffset / 4);
292}
293
296 switch (MI.getOpcode()) {
297 default:
298 llvm_unreachable("unexpected opcode!");
299 case AArch64::B:
300 return MI.getOperand(0).getMBB();
301 case AArch64::TBZW:
302 case AArch64::TBNZW:
303 case AArch64::TBZX:
304 case AArch64::TBNZX:
305 return MI.getOperand(2).getMBB();
306 case AArch64::CBZW:
307 case AArch64::CBNZW:
308 case AArch64::CBZX:
309 case AArch64::CBNZX:
310 case AArch64::Bcc:
311 return MI.getOperand(1).getMBB();
312 case AArch64::CBWPri:
313 case AArch64::CBXPri:
314 case AArch64::CBBAssertExt:
315 case AArch64::CBHAssertExt:
316 case AArch64::CBWPrr:
317 case AArch64::CBXPrr:
318 return MI.getOperand(3).getMBB();
319 }
320}
321
323 MachineBasicBlock &NewDestBB,
324 MachineBasicBlock &RestoreBB,
325 const DebugLoc &DL,
326 int64_t BrOffset,
327 RegScavenger *RS) const {
328 assert(RS && "RegScavenger required for long branching");
329 assert(MBB.empty() &&
330 "new block should be inserted for expanding unconditional branch");
331 assert(MBB.pred_size() == 1);
332 assert(RestoreBB.empty() &&
333 "restore block should be inserted for restoring clobbered registers");
334
335 auto buildIndirectBranch = [&](Register Reg, MachineBasicBlock &DestBB) {
336 // Offsets outside of the signed 33-bit range are not supported for ADRP +
337 // ADD.
338 if (!isInt<33>(BrOffset))
340 "Branch offsets outside of the signed 33-bit range not supported");
341
342 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADRP), Reg)
343 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGE);
344 BuildMI(MBB, MBB.end(), DL, get(AArch64::ADDXri), Reg)
345 .addReg(Reg)
346 .addSym(DestBB.getSymbol(), AArch64II::MO_PAGEOFF | AArch64II::MO_NC)
347 .addImm(0);
348 BuildMI(MBB, MBB.end(), DL, get(AArch64::BR)).addReg(Reg);
349 };
350
351 RS->enterBasicBlockEnd(MBB);
352 // If X16 is unused, we can rely on the linker to insert a range extension
353 // thunk if NewDestBB is out of range of a single B instruction.
354 constexpr Register Reg = AArch64::X16;
355 if (!RS->isRegUsed(Reg)) {
356 insertUnconditionalBranch(MBB, &NewDestBB, DL);
357 RS->setRegUsed(Reg);
358 return;
359 }
360
361 // If there's a free register and it's worth inflating the code size,
362 // manually insert the indirect branch.
363 Register Scavenged = RS->FindUnusedReg(&AArch64::GPR64RegClass);
364 if (Scavenged != AArch64::NoRegister &&
365 MBB.getSectionID() == MBBSectionID::ColdSectionID) {
366 buildIndirectBranch(Scavenged, NewDestBB);
367 RS->setRegUsed(Scavenged);
368 return;
369 }
370
371 // Note: Spilling X16 briefly moves the stack pointer, making it incompatible
372 // with red zones.
373 AArch64FunctionInfo *AFI = MBB.getParent()->getInfo<AArch64FunctionInfo>();
374 if (!AFI || AFI->hasRedZone().value_or(true))
376 "Unable to insert indirect branch inside function that has red zone");
377
378 // Otherwise, spill X16 and defer range extension to the linker.
379 BuildMI(MBB, MBB.end(), DL, get(AArch64::STRXpre))
380 .addReg(AArch64::SP, RegState::Define)
381 .addReg(Reg)
382 .addReg(AArch64::SP)
383 .addImm(-16);
384
385 BuildMI(MBB, MBB.end(), DL, get(AArch64::B)).addMBB(&RestoreBB);
386
387 BuildMI(RestoreBB, RestoreBB.end(), DL, get(AArch64::LDRXpost))
388 .addReg(AArch64::SP, RegState::Define)
390 .addReg(AArch64::SP)
391 .addImm(16);
392}
393
394// Branch analysis.
397 MachineBasicBlock *&FBB,
399 bool AllowModify) const {
400 // If the block has no terminators, it just falls into the block after it.
401 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
402 if (I == MBB.end())
403 return false;
404
405 // Skip over SpeculationBarrierEndBB terminators
406 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
407 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
408 --I;
409 }
410
411 if (!isUnpredicatedTerminator(*I))
412 return false;
413
414 // Get the last instruction in the block.
415 MachineInstr *LastInst = &*I;
416
417 // If there is only one terminator instruction, process it.
418 unsigned LastOpc = LastInst->getOpcode();
419 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
420 if (isUncondBranchOpcode(LastOpc)) {
421 TBB = LastInst->getOperand(0).getMBB();
422 return false;
423 }
424 if (isCondBranchOpcode(LastOpc)) {
425 // Block ends with fall-through condbranch.
426 parseCondBranch(LastInst, TBB, Cond);
427 return false;
428 }
429 return true; // Can't handle indirect branch.
430 }
431
432 // Get the instruction before it if it is a terminator.
433 MachineInstr *SecondLastInst = &*I;
434 unsigned SecondLastOpc = SecondLastInst->getOpcode();
435
436 // If AllowModify is true and the block ends with two or more unconditional
437 // branches, delete all but the first unconditional branch.
438 if (AllowModify && isUncondBranchOpcode(LastOpc)) {
439 while (isUncondBranchOpcode(SecondLastOpc)) {
440 LastInst->eraseFromParent();
441 LastInst = SecondLastInst;
442 LastOpc = LastInst->getOpcode();
443 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
444 // Return now the only terminator is an unconditional branch.
445 TBB = LastInst->getOperand(0).getMBB();
446 return false;
447 }
448 SecondLastInst = &*I;
449 SecondLastOpc = SecondLastInst->getOpcode();
450 }
451 }
452
453 // If we're allowed to modify and the block ends in a unconditional branch
454 // which could simply fallthrough, remove the branch. (Note: This case only
455 // matters when we can't understand the whole sequence, otherwise it's also
456 // handled by BranchFolding.cpp.)
457 if (AllowModify && isUncondBranchOpcode(LastOpc) &&
458 MBB.isLayoutSuccessor(getBranchDestBlock(*LastInst))) {
459 LastInst->eraseFromParent();
460 LastInst = SecondLastInst;
461 LastOpc = LastInst->getOpcode();
462 if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
463 assert(!isUncondBranchOpcode(LastOpc) &&
464 "unreachable unconditional branches removed above");
465
466 if (isCondBranchOpcode(LastOpc)) {
467 // Block ends with fall-through condbranch.
468 parseCondBranch(LastInst, TBB, Cond);
469 return false;
470 }
471 return true; // Can't handle indirect branch.
472 }
473 SecondLastInst = &*I;
474 SecondLastOpc = SecondLastInst->getOpcode();
475 }
476
477 // If there are three terminators, we don't know what sort of block this is.
478 if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
479 return true;
480
481 // If the block ends with a B and a Bcc, handle it.
482 if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
483 parseCondBranch(SecondLastInst, TBB, Cond);
484 FBB = LastInst->getOperand(0).getMBB();
485 return false;
486 }
487
488 // If the block ends with two unconditional branches, handle it. The second
489 // one is not executed, so remove it.
490 if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
491 TBB = SecondLastInst->getOperand(0).getMBB();
492 I = LastInst;
493 if (AllowModify)
494 I->eraseFromParent();
495 return false;
496 }
497
498 // ...likewise if it ends with an indirect branch followed by an unconditional
499 // branch.
500 if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
501 I = LastInst;
502 if (AllowModify)
503 I->eraseFromParent();
504 return true;
505 }
506
507 // Otherwise, can't handle this.
508 return true;
509}
510
512 MachineBranchPredicate &MBP,
513 bool AllowModify) const {
514 // For the moment, handle only a block which ends with a cb(n)zx followed by
515 // a fallthrough. Why this? Because it is a common form.
516 // TODO: Should we handle b.cc?
517
518 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
519 if (I == MBB.end())
520 return true;
521
522 // Skip over SpeculationBarrierEndBB terminators
523 if (I->getOpcode() == AArch64::SpeculationBarrierISBDSBEndBB ||
524 I->getOpcode() == AArch64::SpeculationBarrierSBEndBB) {
525 --I;
526 }
527
528 if (!isUnpredicatedTerminator(*I))
529 return true;
530
531 // Get the last instruction in the block.
532 MachineInstr *LastInst = &*I;
533 unsigned LastOpc = LastInst->getOpcode();
534 if (!isCondBranchOpcode(LastOpc))
535 return true;
536
537 switch (LastOpc) {
538 default:
539 return true;
540 case AArch64::CBZW:
541 case AArch64::CBZX:
542 case AArch64::CBNZW:
543 case AArch64::CBNZX:
544 break;
545 };
546
547 MBP.TrueDest = LastInst->getOperand(1).getMBB();
548 assert(MBP.TrueDest && "expected!");
549 MBP.FalseDest = MBB.getNextNode();
550
551 MBP.ConditionDef = nullptr;
552 MBP.SingleUseCondition = false;
553
554 MBP.LHS = LastInst->getOperand(0);
555 MBP.RHS = MachineOperand::CreateImm(0);
556 MBP.Predicate = (LastOpc == AArch64::CBNZX || LastOpc == AArch64::CBNZW)
557 ? MachineBranchPredicate::PRED_NE
558 : MachineBranchPredicate::PRED_EQ;
559 return false;
560}
561
564 if (Cond[0].getImm() != -1) {
565 // Regular Bcc
566 AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
568 } else {
569 // Folded compare-and-branch
570 switch (Cond[1].getImm()) {
571 default:
572 llvm_unreachable("Unknown conditional branch!");
573 case AArch64::CBZW:
574 Cond[1].setImm(AArch64::CBNZW);
575 break;
576 case AArch64::CBNZW:
577 Cond[1].setImm(AArch64::CBZW);
578 break;
579 case AArch64::CBZX:
580 Cond[1].setImm(AArch64::CBNZX);
581 break;
582 case AArch64::CBNZX:
583 Cond[1].setImm(AArch64::CBZX);
584 break;
585 case AArch64::TBZW:
586 Cond[1].setImm(AArch64::TBNZW);
587 break;
588 case AArch64::TBNZW:
589 Cond[1].setImm(AArch64::TBZW);
590 break;
591 case AArch64::TBZX:
592 Cond[1].setImm(AArch64::TBNZX);
593 break;
594 case AArch64::TBNZX:
595 Cond[1].setImm(AArch64::TBZX);
596 break;
597
598 // Cond is { -1, Opcode, CC, Op0, Op1, ... }
599 case AArch64::CBWPri:
600 case AArch64::CBXPri:
601 case AArch64::CBBAssertExt:
602 case AArch64::CBHAssertExt:
603 case AArch64::CBWPrr:
604 case AArch64::CBXPrr: {
605 // Pseudos using standard 4bit Arm condition codes
607 static_cast<AArch64CC::CondCode>(Cond[2].getImm());
609 }
610 }
611 }
612
613 return false;
614}
615
617 int *BytesRemoved) const {
618 MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
619 if (I == MBB.end())
620 return 0;
621
622 if (!isUncondBranchOpcode(I->getOpcode()) &&
623 !isCondBranchOpcode(I->getOpcode()))
624 return 0;
625
626 // Remove the branch.
627 I->eraseFromParent();
628
629 I = MBB.end();
630
631 if (I == MBB.begin()) {
632 if (BytesRemoved)
633 *BytesRemoved = 4;
634 return 1;
635 }
636 --I;
637 if (!isCondBranchOpcode(I->getOpcode())) {
638 if (BytesRemoved)
639 *BytesRemoved = 4;
640 return 1;
641 }
642
643 // Remove the branch.
644 I->eraseFromParent();
645 if (BytesRemoved)
646 *BytesRemoved = 8;
647
648 return 2;
649}
650
651void AArch64InstrInfo::instantiateCondBranch(
654 if (Cond[0].getImm() != -1) {
655 // Regular Bcc
656 BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
657 } else {
658 // Folded compare-and-branch
659 // Note that we use addOperand instead of addReg to keep the flags.
660
661 // cbz, cbnz
662 const MachineInstrBuilder MIB =
663 BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
664
665 // tbz/tbnz
666 if (Cond.size() > 3)
667 MIB.add(Cond[3]);
668
669 // cb
670 if (Cond.size() > 4)
671 MIB.add(Cond[4]);
672
673 MIB.addMBB(TBB);
674
675 // cb[b,h]
676 if (Cond.size() > 5) {
677 MIB.addImm(Cond[5].getImm());
678 MIB.addImm(Cond[6].getImm());
679 }
680 }
681}
682
685 ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
686 // Shouldn't be a fall through.
687 assert(TBB && "insertBranch must not be told to insert a fallthrough");
688
689 if (!FBB) {
690 if (Cond.empty()) // Unconditional branch?
691 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
692 else
693 instantiateCondBranch(MBB, DL, TBB, Cond);
694
695 if (BytesAdded)
696 *BytesAdded = 4;
697
698 return 1;
699 }
700
701 // Two-way conditional branch.
702 instantiateCondBranch(MBB, DL, TBB, Cond);
703 BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
704
705 if (BytesAdded)
706 *BytesAdded = 8;
707
708 return 2;
709}
710
711// Find the original register that VReg is copied from.
712static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
713 while (Register::isVirtualRegister(VReg)) {
714 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
715 if (!DefMI->isFullCopy())
716 return VReg;
717 VReg = DefMI->getOperand(1).getReg();
718 }
719 return VReg;
720}
721
722// Determine if VReg is defined by an instruction that can be folded into a
723// csel instruction. If so, return the folded opcode, and the replacement
724// register.
725static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
726 unsigned *NewReg = nullptr) {
727 VReg = removeCopies(MRI, VReg);
729 return 0;
730
731 bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
732 const MachineInstr *DefMI = MRI.getVRegDef(VReg);
733 unsigned Opc = 0;
734 unsigned SrcReg = 0;
735 switch (DefMI->getOpcode()) {
736 case AArch64::SUBREG_TO_REG:
737 // Check for the following way to define an 64-bit immediate:
738 // %0:gpr32 = MOVi32imm 1
739 // %1:gpr64 = SUBREG_TO_REG 0, %0:gpr32, %subreg.sub_32
740 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 0)
741 return 0;
742 if (!DefMI->getOperand(2).isReg())
743 return 0;
744 if (!DefMI->getOperand(3).isImm() ||
745 DefMI->getOperand(3).getImm() != AArch64::sub_32)
746 return 0;
747 DefMI = MRI.getVRegDef(DefMI->getOperand(2).getReg());
748 if (DefMI->getOpcode() != AArch64::MOVi32imm)
749 return 0;
750 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
751 return 0;
752 assert(Is64Bit);
753 SrcReg = AArch64::XZR;
754 Opc = AArch64::CSINCXr;
755 break;
756
757 case AArch64::MOVi32imm:
758 case AArch64::MOVi64imm:
759 if (!DefMI->getOperand(1).isImm() || DefMI->getOperand(1).getImm() != 1)
760 return 0;
761 SrcReg = Is64Bit ? AArch64::XZR : AArch64::WZR;
762 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
763 break;
764
765 case AArch64::ADDSXri:
766 case AArch64::ADDSWri:
767 // if NZCV is used, do not fold.
768 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
769 true) == -1)
770 return 0;
771 // fall-through to ADDXri and ADDWri.
772 [[fallthrough]];
773 case AArch64::ADDXri:
774 case AArch64::ADDWri:
775 // add x, 1 -> csinc.
776 if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
777 DefMI->getOperand(3).getImm() != 0)
778 return 0;
779 SrcReg = DefMI->getOperand(1).getReg();
780 Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
781 break;
782
783 case AArch64::ORNXrr:
784 case AArch64::ORNWrr: {
785 // not x -> csinv, represented as orn dst, xzr, src.
786 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
787 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
788 return 0;
789 SrcReg = DefMI->getOperand(2).getReg();
790 Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
791 break;
792 }
793
794 case AArch64::SUBSXrr:
795 case AArch64::SUBSWrr:
796 // if NZCV is used, do not fold.
797 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
798 true) == -1)
799 return 0;
800 // fall-through to SUBXrr and SUBWrr.
801 [[fallthrough]];
802 case AArch64::SUBXrr:
803 case AArch64::SUBWrr: {
804 // neg x -> csneg, represented as sub dst, xzr, src.
805 unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
806 if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
807 return 0;
808 SrcReg = DefMI->getOperand(2).getReg();
809 Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
810 break;
811 }
812 default:
813 return 0;
814 }
815 assert(Opc && SrcReg && "Missing parameters");
816
817 if (NewReg)
818 *NewReg = SrcReg;
819 return Opc;
820}
821
824 Register DstReg, Register TrueReg,
825 Register FalseReg, int &CondCycles,
826 int &TrueCycles,
827 int &FalseCycles) const {
828 // Check register classes.
829 const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
830 const TargetRegisterClass *RC =
831 RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
832 if (!RC)
833 return false;
834
835 // Also need to check the dest regclass, in case we're trying to optimize
836 // something like:
837 // %1(gpr) = PHI %2(fpr), bb1, %(fpr), bb2
838 if (!RI.getCommonSubClass(RC, MRI.getRegClass(DstReg)))
839 return false;
840
841 // Expanding cbz/tbz requires an extra cycle of latency on the condition.
842 unsigned ExtraCondLat = Cond.size() != 1;
843
844 // GPRs are handled by csel.
845 // FIXME: Fold in x+1, -x, and ~x when applicable.
846 if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
847 AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
848 // Single-cycle csel, csinc, csinv, and csneg.
849 CondCycles = 1 + ExtraCondLat;
850 TrueCycles = FalseCycles = 1;
851 if (canFoldIntoCSel(MRI, TrueReg))
852 TrueCycles = 0;
853 else if (canFoldIntoCSel(MRI, FalseReg))
854 FalseCycles = 0;
855 return true;
856 }
857
858 // Scalar floating point is handled by fcsel.
859 // FIXME: Form fabs, fmin, and fmax when applicable.
860 if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
861 AArch64::FPR32RegClass.hasSubClassEq(RC)) {
862 CondCycles = 5 + ExtraCondLat;
863 TrueCycles = FalseCycles = 2;
864 return true;
865 }
866
867 // Can't do vectors.
868 return false;
869}
870
873 const DebugLoc &DL, Register DstReg,
875 Register TrueReg, Register FalseReg) const {
876 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
877
878 // Parse the condition code, see parseCondBranch() above.
880 switch (Cond.size()) {
881 default:
882 llvm_unreachable("Unknown condition opcode in Cond");
883 case 1: // b.cc
885 break;
886 case 3: { // cbz/cbnz
887 // We must insert a compare against 0.
888 bool Is64Bit;
889 switch (Cond[1].getImm()) {
890 default:
891 llvm_unreachable("Unknown branch opcode in Cond");
892 case AArch64::CBZW:
893 Is64Bit = false;
894 CC = AArch64CC::EQ;
895 break;
896 case AArch64::CBZX:
897 Is64Bit = true;
898 CC = AArch64CC::EQ;
899 break;
900 case AArch64::CBNZW:
901 Is64Bit = false;
902 CC = AArch64CC::NE;
903 break;
904 case AArch64::CBNZX:
905 Is64Bit = true;
906 CC = AArch64CC::NE;
907 break;
908 }
909 Register SrcReg = Cond[2].getReg();
910 if (Is64Bit) {
911 // cmp reg, #0 is actually subs xzr, reg, #0.
912 MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
913 BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
914 .addReg(SrcReg)
915 .addImm(0)
916 .addImm(0);
917 } else {
918 MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
919 BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
920 .addReg(SrcReg)
921 .addImm(0)
922 .addImm(0);
923 }
924 break;
925 }
926 case 4: { // tbz/tbnz
927 // We must insert a tst instruction.
928 switch (Cond[1].getImm()) {
929 default:
930 llvm_unreachable("Unknown branch opcode in Cond");
931 case AArch64::TBZW:
932 case AArch64::TBZX:
933 CC = AArch64CC::EQ;
934 break;
935 case AArch64::TBNZW:
936 case AArch64::TBNZX:
937 CC = AArch64CC::NE;
938 break;
939 }
940 // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
941 if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
942 BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
943 .addReg(Cond[2].getReg())
944 .addImm(
946 else
947 BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
948 .addReg(Cond[2].getReg())
949 .addImm(
951 break;
952 }
953 case 5: { // cb
954 // We must insert a cmp, that is a subs
955 // 0 1 2 3 4
956 // Cond is { -1, Opcode, CC, Op0, Op1 }
957
958 unsigned SubsOpc, SubsDestReg;
959 bool IsImm = false;
960 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
961 switch (Cond[1].getImm()) {
962 default:
963 llvm_unreachable("Unknown branch opcode in Cond");
964 case AArch64::CBWPri:
965 SubsOpc = AArch64::SUBSWri;
966 SubsDestReg = AArch64::WZR;
967 IsImm = true;
968 break;
969 case AArch64::CBXPri:
970 SubsOpc = AArch64::SUBSXri;
971 SubsDestReg = AArch64::XZR;
972 IsImm = true;
973 break;
974 case AArch64::CBWPrr:
975 SubsOpc = AArch64::SUBSWrr;
976 SubsDestReg = AArch64::WZR;
977 IsImm = false;
978 break;
979 case AArch64::CBXPrr:
980 SubsOpc = AArch64::SUBSXrr;
981 SubsDestReg = AArch64::XZR;
982 IsImm = false;
983 break;
984 }
985
986 if (IsImm)
987 BuildMI(MBB, I, DL, get(SubsOpc), SubsDestReg)
988 .addReg(Cond[3].getReg())
989 .addImm(Cond[4].getImm())
990 .addImm(0);
991 else
992 BuildMI(MBB, I, DL, get(SubsOpc), SubsDestReg)
993 .addReg(Cond[3].getReg())
994 .addReg(Cond[4].getReg());
995 } break;
996 case 7: { // cb[b,h]
997 // We must insert a cmp, that is a subs, but also zero- or sign-extensions
998 // that have been folded. For the first operand we codegen an explicit
999 // extension, for the second operand we fold the extension into cmp.
1000 // 0 1 2 3 4 5 6
1001 // Cond is { -1, Opcode, CC, Op0, Op1, Ext0, Ext1 }
1002
1003 // We need a new register for the now explicitly extended register
1004 Register Reg = Cond[4].getReg();
1006 unsigned ExtOpc;
1007 unsigned ExtBits;
1008 AArch64_AM::ShiftExtendType ExtendType =
1010 switch (ExtendType) {
1011 default:
1012 llvm_unreachable("Unknown shift-extend for CB instruction");
1013 case AArch64_AM::SXTB:
1014 assert(
1015 Cond[1].getImm() == AArch64::CBBAssertExt &&
1016 "Unexpected compare-and-branch instruction for SXTB shift-extend");
1017 ExtOpc = AArch64::SBFMWri;
1018 ExtBits = AArch64_AM::encodeLogicalImmediate(0xff, 32);
1019 break;
1020 case AArch64_AM::SXTH:
1021 assert(
1022 Cond[1].getImm() == AArch64::CBHAssertExt &&
1023 "Unexpected compare-and-branch instruction for SXTH shift-extend");
1024 ExtOpc = AArch64::SBFMWri;
1025 ExtBits = AArch64_AM::encodeLogicalImmediate(0xffff, 32);
1026 break;
1027 case AArch64_AM::UXTB:
1028 assert(
1029 Cond[1].getImm() == AArch64::CBBAssertExt &&
1030 "Unexpected compare-and-branch instruction for UXTB shift-extend");
1031 ExtOpc = AArch64::ANDWri;
1032 ExtBits = AArch64_AM::encodeLogicalImmediate(0xff, 32);
1033 break;
1034 case AArch64_AM::UXTH:
1035 assert(
1036 Cond[1].getImm() == AArch64::CBHAssertExt &&
1037 "Unexpected compare-and-branch instruction for UXTH shift-extend");
1038 ExtOpc = AArch64::ANDWri;
1039 ExtBits = AArch64_AM::encodeLogicalImmediate(0xffff, 32);
1040 break;
1041 }
1042
1043 // Build the explicit extension of the first operand
1044 Reg = MRI.createVirtualRegister(&AArch64::GPR32spRegClass);
1046 BuildMI(MBB, I, DL, get(ExtOpc), Reg).addReg(Cond[4].getReg());
1047 if (ExtOpc != AArch64::ANDWri)
1048 MBBI.addImm(0);
1049 MBBI.addImm(ExtBits);
1050 }
1051
1052 // Now, subs with an extended second operand
1054 AArch64_AM::ShiftExtendType ExtendType =
1056 MRI.constrainRegClass(Reg, MRI.getRegClass(Cond[3].getReg()));
1057 MRI.constrainRegClass(Cond[3].getReg(), &AArch64::GPR32spRegClass);
1058 BuildMI(MBB, I, DL, get(AArch64::SUBSWrx), AArch64::WZR)
1059 .addReg(Cond[3].getReg())
1060 .addReg(Reg)
1061 .addImm(AArch64_AM::getArithExtendImm(ExtendType, 0));
1062 } // If no extension is needed, just a regular subs
1063 else {
1064 MRI.constrainRegClass(Reg, MRI.getRegClass(Cond[3].getReg()));
1065 MRI.constrainRegClass(Cond[3].getReg(), &AArch64::GPR32spRegClass);
1066 BuildMI(MBB, I, DL, get(AArch64::SUBSWrr), AArch64::WZR)
1067 .addReg(Cond[3].getReg())
1068 .addReg(Reg);
1069 }
1070
1071 CC = static_cast<AArch64CC::CondCode>(Cond[2].getImm());
1072 } break;
1073 }
1074
1075 unsigned Opc = 0;
1076 const TargetRegisterClass *RC = nullptr;
1077 bool TryFold = false;
1078 if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
1079 RC = &AArch64::GPR64RegClass;
1080 Opc = AArch64::CSELXr;
1081 TryFold = true;
1082 } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
1083 RC = &AArch64::GPR32RegClass;
1084 Opc = AArch64::CSELWr;
1085 TryFold = true;
1086 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
1087 RC = &AArch64::FPR64RegClass;
1088 Opc = AArch64::FCSELDrrr;
1089 } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
1090 RC = &AArch64::FPR32RegClass;
1091 Opc = AArch64::FCSELSrrr;
1092 }
1093 assert(RC && "Unsupported regclass");
1094
1095 // Try folding simple instructions into the csel.
1096 if (TryFold) {
1097 unsigned NewReg = 0;
1098 unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewReg);
1099 if (FoldedOpc) {
1100 // The folded opcodes csinc, csinc and csneg apply the operation to
1101 // FalseReg, so we need to invert the condition.
1103 TrueReg = FalseReg;
1104 } else
1105 FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewReg);
1106
1107 // Fold the operation. Leave any dead instructions for DCE to clean up.
1108 if (FoldedOpc) {
1109 FalseReg = NewReg;
1110 Opc = FoldedOpc;
1111 // Extend the live range of NewReg.
1112 MRI.clearKillFlags(NewReg);
1113 }
1114 }
1115
1116 // Pull all virtual register into the appropriate class.
1117 MRI.constrainRegClass(TrueReg, RC);
1118 // FalseReg might be WZR or XZR if the folded operand is a literal 1.
1119 assert(
1120 (FalseReg.isVirtual() || FalseReg == AArch64::WZR ||
1121 FalseReg == AArch64::XZR) &&
1122 "FalseReg was folded into a non-virtual register other than WZR or XZR");
1123 if (FalseReg.isVirtual())
1124 MRI.constrainRegClass(FalseReg, RC);
1125
1126 // Insert the csel.
1127 BuildMI(MBB, I, DL, get(Opc), DstReg)
1128 .addReg(TrueReg)
1129 .addReg(FalseReg)
1130 .addImm(CC);
1131}
1132
1133// Return true if Imm can be loaded into a register by a "cheap" sequence of
1134// instructions. For now, "cheap" means at most two instructions.
1135static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize) {
1136 if (BitSize == 32)
1137 return true;
1138
1139 assert(BitSize == 64 && "Only bit sizes of 32 or 64 allowed");
1140 uint64_t Imm = static_cast<uint64_t>(MI.getOperand(1).getImm());
1142 AArch64_IMM::expandMOVImm(Imm, BitSize, Is);
1143
1144 return Is.size() <= 2;
1145}
1146
1147// Check if a COPY instruction is cheap.
1148static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI) {
1149 assert(MI.isCopy() && "Expected COPY instruction");
1150 const MachineRegisterInfo &MRI = MI.getMF()->getRegInfo();
1151
1152 // Cross-bank copies (e.g., between GPR and FPR) are expensive on AArch64,
1153 // typically requiring an FMOV instruction with a 2-6 cycle latency.
1154 auto GetRegClass = [&](Register Reg) -> const TargetRegisterClass * {
1155 if (Reg.isVirtual())
1156 return MRI.getRegClass(Reg);
1157 if (Reg.isPhysical())
1158 return RI.getMinimalPhysRegClass(Reg);
1159 return nullptr;
1160 };
1161 const TargetRegisterClass *DstRC = GetRegClass(MI.getOperand(0).getReg());
1162 const TargetRegisterClass *SrcRC = GetRegClass(MI.getOperand(1).getReg());
1163 if (DstRC && SrcRC && !RI.getCommonSubClass(DstRC, SrcRC))
1164 return false;
1165
1166 return MI.isAsCheapAsAMove();
1167}
1168
1169// FIXME: this implementation should be micro-architecture dependent, so a
1170// micro-architecture target hook should be introduced here in future.
1172 if (Subtarget.hasExynosCheapAsMoveHandling()) {
1173 if (isExynosCheapAsMove(MI))
1174 return true;
1175 return MI.isAsCheapAsAMove();
1176 }
1177
1178 switch (MI.getOpcode()) {
1179 default:
1180 return MI.isAsCheapAsAMove();
1181
1182 case TargetOpcode::COPY:
1183 return isCheapCopy(MI, RI);
1184
1185 case AArch64::ADDWrs:
1186 case AArch64::ADDXrs:
1187 case AArch64::SUBWrs:
1188 case AArch64::SUBXrs:
1189 return Subtarget.hasALULSLFast() && MI.getOperand(3).getImm() <= 4;
1190
1191 // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
1192 // ORRXri, it is as cheap as MOV.
1193 // Likewise if it can be expanded to MOVZ/MOVN/MOVK.
1194 case AArch64::MOVi32imm:
1195 return isCheapImmediate(MI, 32);
1196 case AArch64::MOVi64imm:
1197 return isCheapImmediate(MI, 64);
1198 }
1199}
1200
1201bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) {
1202 switch (MI.getOpcode()) {
1203 default:
1204 return false;
1205
1206 case AArch64::ADDWrs:
1207 case AArch64::ADDXrs:
1208 case AArch64::ADDSWrs:
1209 case AArch64::ADDSXrs: {
1210 unsigned Imm = MI.getOperand(3).getImm();
1211 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1212 if (ShiftVal == 0)
1213 return true;
1214 return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
1215 }
1216
1217 case AArch64::ADDWrx:
1218 case AArch64::ADDXrx:
1219 case AArch64::ADDXrx64:
1220 case AArch64::ADDSWrx:
1221 case AArch64::ADDSXrx:
1222 case AArch64::ADDSXrx64: {
1223 unsigned Imm = MI.getOperand(3).getImm();
1224 switch (AArch64_AM::getArithExtendType(Imm)) {
1225 default:
1226 return false;
1227 case AArch64_AM::UXTB:
1228 case AArch64_AM::UXTH:
1229 case AArch64_AM::UXTW:
1230 case AArch64_AM::UXTX:
1231 return AArch64_AM::getArithShiftValue(Imm) <= 4;
1232 }
1233 }
1234
1235 case AArch64::SUBWrs:
1236 case AArch64::SUBSWrs: {
1237 unsigned Imm = MI.getOperand(3).getImm();
1238 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1239 return ShiftVal == 0 ||
1240 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
1241 }
1242
1243 case AArch64::SUBXrs:
1244 case AArch64::SUBSXrs: {
1245 unsigned Imm = MI.getOperand(3).getImm();
1246 unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
1247 return ShiftVal == 0 ||
1248 (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
1249 }
1250
1251 case AArch64::SUBWrx:
1252 case AArch64::SUBXrx:
1253 case AArch64::SUBXrx64:
1254 case AArch64::SUBSWrx:
1255 case AArch64::SUBSXrx:
1256 case AArch64::SUBSXrx64: {
1257 unsigned Imm = MI.getOperand(3).getImm();
1258 switch (AArch64_AM::getArithExtendType(Imm)) {
1259 default:
1260 return false;
1261 case AArch64_AM::UXTB:
1262 case AArch64_AM::UXTH:
1263 case AArch64_AM::UXTW:
1264 case AArch64_AM::UXTX:
1265 return AArch64_AM::getArithShiftValue(Imm) == 0;
1266 }
1267 }
1268
1269 case AArch64::LDRBBroW:
1270 case AArch64::LDRBBroX:
1271 case AArch64::LDRBroW:
1272 case AArch64::LDRBroX:
1273 case AArch64::LDRDroW:
1274 case AArch64::LDRDroX:
1275 case AArch64::LDRHHroW:
1276 case AArch64::LDRHHroX:
1277 case AArch64::LDRHroW:
1278 case AArch64::LDRHroX:
1279 case AArch64::LDRQroW:
1280 case AArch64::LDRQroX:
1281 case AArch64::LDRSBWroW:
1282 case AArch64::LDRSBWroX:
1283 case AArch64::LDRSBXroW:
1284 case AArch64::LDRSBXroX:
1285 case AArch64::LDRSHWroW:
1286 case AArch64::LDRSHWroX:
1287 case AArch64::LDRSHXroW:
1288 case AArch64::LDRSHXroX:
1289 case AArch64::LDRSWroW:
1290 case AArch64::LDRSWroX:
1291 case AArch64::LDRSroW:
1292 case AArch64::LDRSroX:
1293 case AArch64::LDRWroW:
1294 case AArch64::LDRWroX:
1295 case AArch64::LDRXroW:
1296 case AArch64::LDRXroX:
1297 case AArch64::PRFMroW:
1298 case AArch64::PRFMroX:
1299 case AArch64::STRBBroW:
1300 case AArch64::STRBBroX:
1301 case AArch64::STRBroW:
1302 case AArch64::STRBroX:
1303 case AArch64::STRDroW:
1304 case AArch64::STRDroX:
1305 case AArch64::STRHHroW:
1306 case AArch64::STRHHroX:
1307 case AArch64::STRHroW:
1308 case AArch64::STRHroX:
1309 case AArch64::STRQroW:
1310 case AArch64::STRQroX:
1311 case AArch64::STRSroW:
1312 case AArch64::STRSroX:
1313 case AArch64::STRWroW:
1314 case AArch64::STRWroX:
1315 case AArch64::STRXroW:
1316 case AArch64::STRXroX: {
1317 unsigned IsSigned = MI.getOperand(3).getImm();
1318 return !IsSigned;
1319 }
1320 }
1321}
1322
1323bool AArch64InstrInfo::isSEHInstruction(const MachineInstr &MI) {
1324 unsigned Opc = MI.getOpcode();
1325 switch (Opc) {
1326 default:
1327 return false;
1328 case AArch64::SEH_StackAlloc:
1329 case AArch64::SEH_SaveFPLR:
1330 case AArch64::SEH_SaveFPLR_X:
1331 case AArch64::SEH_SaveReg:
1332 case AArch64::SEH_SaveReg_X:
1333 case AArch64::SEH_SaveRegP:
1334 case AArch64::SEH_SaveRegP_X:
1335 case AArch64::SEH_SaveFReg:
1336 case AArch64::SEH_SaveFReg_X:
1337 case AArch64::SEH_SaveFRegP:
1338 case AArch64::SEH_SaveFRegP_X:
1339 case AArch64::SEH_SetFP:
1340 case AArch64::SEH_AddFP:
1341 case AArch64::SEH_Nop:
1342 case AArch64::SEH_PrologEnd:
1343 case AArch64::SEH_EpilogStart:
1344 case AArch64::SEH_EpilogEnd:
1345 case AArch64::SEH_PACSignLR:
1346 case AArch64::SEH_SaveAnyRegI:
1347 case AArch64::SEH_SaveAnyRegIP:
1348 case AArch64::SEH_SaveAnyRegQP:
1349 case AArch64::SEH_SaveAnyRegQPX:
1350 case AArch64::SEH_AllocZ:
1351 case AArch64::SEH_SaveZReg:
1352 case AArch64::SEH_SavePReg:
1353 return true;
1354 }
1355}
1356
1358 Register &SrcReg, Register &DstReg,
1359 unsigned &SubIdx) const {
1360 switch (MI.getOpcode()) {
1361 default:
1362 return false;
1363 case AArch64::SBFMXri: // aka sxtw
1364 case AArch64::UBFMXri: // aka uxtw
1365 // Check for the 32 -> 64 bit extension case, these instructions can do
1366 // much more.
1367 if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1368 return false;
1369 // This is a signed or unsigned 32 -> 64 bit extension.
1370 SrcReg = MI.getOperand(1).getReg();
1371 DstReg = MI.getOperand(0).getReg();
1372 SubIdx = AArch64::sub_32;
1373 return true;
1374 }
1375}
1376
1378 const MachineInstr &MIa, const MachineInstr &MIb) const {
1380 const MachineOperand *BaseOpA = nullptr, *BaseOpB = nullptr;
1381 int64_t OffsetA = 0, OffsetB = 0;
1382 TypeSize WidthA(0, false), WidthB(0, false);
1383 bool OffsetAIsScalable = false, OffsetBIsScalable = false;
1384
1385 assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1386 assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1387
1390 return false;
1391
1392 // Retrieve the base, offset from the base and width. Width
1393 // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1394 // base are identical, and the offset of a lower memory access +
1395 // the width doesn't overlap the offset of a higher memory access,
1396 // then the memory accesses are different.
1397 // If OffsetAIsScalable and OffsetBIsScalable are both true, they
1398 // are assumed to have the same scale (vscale).
1399 if (getMemOperandWithOffsetWidth(MIa, BaseOpA, OffsetA, OffsetAIsScalable,
1400 WidthA, TRI) &&
1401 getMemOperandWithOffsetWidth(MIb, BaseOpB, OffsetB, OffsetBIsScalable,
1402 WidthB, TRI)) {
1403 if (BaseOpA->isIdenticalTo(*BaseOpB) &&
1404 OffsetAIsScalable == OffsetBIsScalable) {
1405 int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1406 int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1407 TypeSize LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1408 if (LowWidth.isScalable() == OffsetAIsScalable &&
1409 LowOffset + (int)LowWidth.getKnownMinValue() <= HighOffset)
1410 return true;
1411 }
1412 }
1413 return false;
1414}
1415
1417 const MachineBasicBlock *MBB,
1418 const MachineFunction &MF) const {
1420 return true;
1421
1422 // Do not move an instruction that can be recognized as a branch target.
1423 if (hasBTISemantics(MI))
1424 return true;
1425
1426 switch (MI.getOpcode()) {
1427 case AArch64::HINT:
1428 // CSDB hints are scheduling barriers.
1429 if (MI.getOperand(0).getImm() == 0x14)
1430 return true;
1431 break;
1432 case AArch64::DSB:
1433 case AArch64::ISB:
1434 // DSB and ISB also are scheduling barriers.
1435 return true;
1436 case AArch64::MSRpstatesvcrImm1:
1437 // SMSTART and SMSTOP are also scheduling barriers.
1438 return true;
1439 default:;
1440 }
1441 if (isSEHInstruction(MI))
1442 return true;
1443 auto Next = std::next(MI.getIterator());
1444 return Next != MBB->end() && Next->isCFIInstruction();
1445}
1446
1447/// analyzeCompare - For a comparison instruction, return the source registers
1448/// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1449/// Return true if the comparison instruction can be analyzed.
1451 Register &SrcReg2, int64_t &CmpMask,
1452 int64_t &CmpValue) const {
1453 // The first operand can be a frame index where we'd normally expect a
1454 // register.
1455 // FIXME: Pass subregisters out of analyzeCompare
1456 assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1457 if (!MI.getOperand(1).isReg() || MI.getOperand(1).getSubReg())
1458 return false;
1459
1460 switch (MI.getOpcode()) {
1461 default:
1462 break;
1463 case AArch64::PTEST_PP:
1464 case AArch64::PTEST_PP_ANY:
1465 case AArch64::PTEST_PP_FIRST:
1466 SrcReg = MI.getOperand(0).getReg();
1467 SrcReg2 = MI.getOperand(1).getReg();
1468 if (MI.getOperand(2).getSubReg())
1469 return false;
1470
1471 // Not sure about the mask and value for now...
1472 CmpMask = ~0;
1473 CmpValue = 0;
1474 return true;
1475 case AArch64::SUBSWrr:
1476 case AArch64::SUBSWrs:
1477 case AArch64::SUBSWrx:
1478 case AArch64::SUBSXrr:
1479 case AArch64::SUBSXrs:
1480 case AArch64::SUBSXrx:
1481 case AArch64::ADDSWrr:
1482 case AArch64::ADDSWrs:
1483 case AArch64::ADDSWrx:
1484 case AArch64::ADDSXrr:
1485 case AArch64::ADDSXrs:
1486 case AArch64::ADDSXrx:
1487 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1488 SrcReg = MI.getOperand(1).getReg();
1489 SrcReg2 = MI.getOperand(2).getReg();
1490
1491 // FIXME: Pass subregisters out of analyzeCompare
1492 if (MI.getOperand(2).getSubReg())
1493 return false;
1494
1495 CmpMask = ~0;
1496 CmpValue = 0;
1497 return true;
1498 case AArch64::SUBSWri:
1499 case AArch64::ADDSWri:
1500 case AArch64::SUBSXri:
1501 case AArch64::ADDSXri:
1502 SrcReg = MI.getOperand(1).getReg();
1503 SrcReg2 = 0;
1504 CmpMask = ~0;
1505 CmpValue = MI.getOperand(2).getImm();
1506 return true;
1507 case AArch64::ANDSWri:
1508 case AArch64::ANDSXri:
1509 // ANDS does not use the same encoding scheme as the others xxxS
1510 // instructions.
1511 SrcReg = MI.getOperand(1).getReg();
1512 SrcReg2 = 0;
1513 CmpMask = ~0;
1515 MI.getOperand(2).getImm(),
1516 MI.getOpcode() == AArch64::ANDSWri ? 32 : 64);
1517 return true;
1518 }
1519
1520 return false;
1521}
1522
1524 MachineBasicBlock *MBB = Instr.getParent();
1525 assert(MBB && "Can't get MachineBasicBlock here");
1526 MachineFunction *MF = MBB->getParent();
1527 assert(MF && "Can't get MachineFunction here");
1531
1532 for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1533 ++OpIdx) {
1534 MachineOperand &MO = Instr.getOperand(OpIdx);
1535 const TargetRegisterClass *OpRegCstraints =
1536 Instr.getRegClassConstraint(OpIdx, TII, TRI);
1537
1538 // If there's no constraint, there's nothing to do.
1539 if (!OpRegCstraints)
1540 continue;
1541 // If the operand is a frame index, there's nothing to do here.
1542 // A frame index operand will resolve correctly during PEI.
1543 if (MO.isFI())
1544 continue;
1545
1546 assert(MO.isReg() &&
1547 "Operand has register constraints without being a register!");
1548
1549 Register Reg = MO.getReg();
1550 if (Reg.isPhysical()) {
1551 if (!OpRegCstraints->contains(Reg))
1552 return false;
1553 } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1554 !MRI->constrainRegClass(Reg, OpRegCstraints))
1555 return false;
1556 }
1557
1558 return true;
1559}
1560
1561/// Return the opcode that does not set flags when possible - otherwise
1562/// return the original opcode. The caller is responsible to do the actual
1563/// substitution and legality checking.
1565 // Don't convert all compare instructions, because for some the zero register
1566 // encoding becomes the sp register.
1567 bool MIDefinesZeroReg = false;
1568 if (MI.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1569 MI.definesRegister(AArch64::XZR, /*TRI=*/nullptr))
1570 MIDefinesZeroReg = true;
1571
1572 switch (MI.getOpcode()) {
1573 default:
1574 return MI.getOpcode();
1575 case AArch64::ADDSWrr:
1576 return AArch64::ADDWrr;
1577 case AArch64::ADDSWri:
1578 return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1579 case AArch64::ADDSWrs:
1580 return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1581 case AArch64::ADDSWrx:
1582 return AArch64::ADDWrx;
1583 case AArch64::ADDSXrr:
1584 return AArch64::ADDXrr;
1585 case AArch64::ADDSXri:
1586 return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1587 case AArch64::ADDSXrs:
1588 return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1589 case AArch64::ADDSXrx:
1590 return AArch64::ADDXrx;
1591 case AArch64::SUBSWrr:
1592 return AArch64::SUBWrr;
1593 case AArch64::SUBSWri:
1594 return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1595 case AArch64::SUBSWrs:
1596 return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1597 case AArch64::SUBSWrx:
1598 return AArch64::SUBWrx;
1599 case AArch64::SUBSXrr:
1600 return AArch64::SUBXrr;
1601 case AArch64::SUBSXri:
1602 return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1603 case AArch64::SUBSXrs:
1604 return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1605 case AArch64::SUBSXrx:
1606 return AArch64::SUBXrx;
1607 }
1608}
1609
1610enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1611
1612/// True when condition flags are accessed (either by writing or reading)
1613/// on the instruction trace starting at From and ending at To.
1614///
1615/// Note: If From and To are from different blocks it's assumed CC are accessed
1616/// on the path.
1619 const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1620 // Early exit if To is at the beginning of the BB.
1621 if (To == To->getParent()->begin())
1622 return true;
1623
1624 // Check whether the instructions are in the same basic block
1625 // If not, assume the condition flags might get modified somewhere.
1626 if (To->getParent() != From->getParent())
1627 return true;
1628
1629 // From must be above To.
1630 assert(std::any_of(
1631 ++To.getReverse(), To->getParent()->rend(),
1632 [From](MachineInstr &MI) { return MI.getIterator() == From; }));
1633
1634 // We iterate backward starting at \p To until we hit \p From.
1635 for (const MachineInstr &Instr :
1637 if (((AccessToCheck & AK_Write) &&
1638 Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1639 ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1640 return true;
1641 }
1642 return false;
1643}
1644
1645std::optional<unsigned>
1646AArch64InstrInfo::canRemovePTestInstr(MachineInstr *PTest, MachineInstr *Mask,
1647 MachineInstr *Pred,
1648 const MachineRegisterInfo *MRI) const {
1649 unsigned MaskOpcode = Mask->getOpcode();
1650 unsigned PredOpcode = Pred->getOpcode();
1651 bool PredIsPTestLike = isPTestLikeOpcode(PredOpcode);
1652 bool PredIsWhileLike = isWhileOpcode(PredOpcode);
1653
1654 if (PredIsWhileLike) {
1655 // For PTEST(PG, PG), PTEST is redundant when PG is the result of a WHILEcc
1656 // instruction and the condition is "any" since WHILcc does an implicit
1657 // PTEST(ALL, PG) check and PG is always a subset of ALL.
1658 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1659 return PredOpcode;
1660
1661 // For PTEST(PTRUE_ALL, WHILE), if the element size matches, the PTEST is
1662 // redundant since WHILE performs an implicit PTEST with an all active
1663 // mask.
1664 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1665 getElementSizeForOpcode(MaskOpcode) ==
1666 getElementSizeForOpcode(PredOpcode))
1667 return PredOpcode;
1668
1669 // For PTEST_FIRST(PTRUE_ALL, WHILE), the PTEST_FIRST is redundant since
1670 // WHILEcc performs an implicit PTEST with an all active mask, setting
1671 // the N flag as the PTEST_FIRST would.
1672 if (PTest->getOpcode() == AArch64::PTEST_PP_FIRST &&
1673 isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31)
1674 return PredOpcode;
1675
1676 return {};
1677 }
1678
1679 if (PredIsPTestLike) {
1680 // For PTEST(PG, PG), PTEST is redundant when PG is the result of an
1681 // instruction that sets the flags as PTEST would and the condition is
1682 // "any" since PG is always a subset of the governing predicate of the
1683 // ptest-like instruction.
1684 if ((Mask == Pred) && PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1685 return PredOpcode;
1686
1687 auto PTestLikeMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1688
1689 // If the PTEST like instruction's general predicate is not `Mask`, attempt
1690 // to look through a copy and try again. This is because some instructions
1691 // take a predicate whose register class is a subset of its result class.
1692 if (Mask != PTestLikeMask && PTestLikeMask->isFullCopy() &&
1693 PTestLikeMask->getOperand(1).getReg().isVirtual())
1694 PTestLikeMask =
1695 MRI->getUniqueVRegDef(PTestLikeMask->getOperand(1).getReg());
1696
1697 // For PTEST(PTRUE_ALL, PTEST_LIKE), the PTEST is redundant if the
1698 // the element size matches and either the PTEST_LIKE instruction uses
1699 // the same all active mask or the condition is "any".
1700 if (isPTrueOpcode(MaskOpcode) && Mask->getOperand(1).getImm() == 31 &&
1701 getElementSizeForOpcode(MaskOpcode) ==
1702 getElementSizeForOpcode(PredOpcode)) {
1703 if (Mask == PTestLikeMask || PTest->getOpcode() == AArch64::PTEST_PP_ANY)
1704 return PredOpcode;
1705 }
1706
1707 // For PTEST(PG, PTEST_LIKE(PG, ...)), the PTEST is redundant since the
1708 // flags are set based on the same mask 'PG', but PTEST_LIKE must operate
1709 // on 8-bit predicates like the PTEST. Otherwise, for instructions like
1710 // compare that also support 16/32/64-bit predicates, the implicit PTEST
1711 // performed by the compare could consider fewer lanes for these element
1712 // sizes.
1713 //
1714 // For example, consider
1715 //
1716 // ptrue p0.b ; P0=1111-1111-1111-1111
1717 // index z0.s, #0, #1 ; Z0=<0,1,2,3>
1718 // index z1.s, #1, #1 ; Z1=<1,2,3,4>
1719 // cmphi p1.s, p0/z, z1.s, z0.s ; P1=0001-0001-0001-0001
1720 // ; ^ last active
1721 // ptest p0, p1.b ; P1=0001-0001-0001-0001
1722 // ; ^ last active
1723 //
1724 // where the compare generates a canonical all active 32-bit predicate
1725 // (equivalent to 'ptrue p1.s, all'). The implicit PTEST sets the last
1726 // active flag, whereas the PTEST instruction with the same mask doesn't.
1727 // For PTEST_ANY this doesn't apply as the flags in this case would be
1728 // identical regardless of element size.
1729 uint64_t PredElementSize = getElementSizeForOpcode(PredOpcode);
1730 if (Mask == PTestLikeMask && (PredElementSize == AArch64::ElementSizeB ||
1731 PTest->getOpcode() == AArch64::PTEST_PP_ANY))
1732 return PredOpcode;
1733
1734 return {};
1735 }
1736
1737 // If OP in PTEST(PG, OP(PG, ...)) has a flag-setting variant change the
1738 // opcode so the PTEST becomes redundant.
1739 switch (PredOpcode) {
1740 case AArch64::AND_PPzPP:
1741 case AArch64::BIC_PPzPP:
1742 case AArch64::EOR_PPzPP:
1743 case AArch64::NAND_PPzPP:
1744 case AArch64::NOR_PPzPP:
1745 case AArch64::ORN_PPzPP:
1746 case AArch64::ORR_PPzPP:
1747 case AArch64::BRKA_PPzP:
1748 case AArch64::BRKPA_PPzPP:
1749 case AArch64::BRKB_PPzP:
1750 case AArch64::BRKPB_PPzPP:
1751 case AArch64::RDFFR_PPz: {
1752 // Check to see if our mask is the same. If not the resulting flag bits
1753 // may be different and we can't remove the ptest.
1754 auto *PredMask = MRI->getUniqueVRegDef(Pred->getOperand(1).getReg());
1755 if (Mask != PredMask)
1756 return {};
1757 break;
1758 }
1759 case AArch64::BRKN_PPzP: {
1760 // BRKN uses an all active implicit mask to set flags unlike the other
1761 // flag-setting instructions.
1762 // PTEST(PTRUE_B(31), BRKN(PG, A, B)) -> BRKNS(PG, A, B).
1763 if ((MaskOpcode != AArch64::PTRUE_B) ||
1764 (Mask->getOperand(1).getImm() != 31))
1765 return {};
1766 break;
1767 }
1768 case AArch64::PTRUE_B:
1769 // PTEST(OP=PTRUE_B(A), OP) -> PTRUES_B(A)
1770 break;
1771 default:
1772 // Bail out if we don't recognize the input
1773 return {};
1774 }
1775
1776 return convertToFlagSettingOpc(PredOpcode);
1777}
1778
1779/// optimizePTestInstr - Attempt to remove a ptest of a predicate-generating
1780/// operation which could set the flags in an identical manner
1781bool AArch64InstrInfo::optimizePTestInstr(
1782 MachineInstr *PTest, unsigned MaskReg, unsigned PredReg,
1783 const MachineRegisterInfo *MRI) const {
1784 auto *Mask = MRI->getUniqueVRegDef(MaskReg);
1785 auto *Pred = MRI->getUniqueVRegDef(PredReg);
1786
1787 if (Pred->isCopy() && PTest->getOpcode() == AArch64::PTEST_PP_FIRST) {
1788 // Instructions which return a multi-vector (e.g. WHILECC_x2) require copies
1789 // before the branch to extract each subregister.
1790 auto Op = Pred->getOperand(1);
1791 if (Op.isReg() && Op.getReg().isVirtual() &&
1792 Op.getSubReg() == AArch64::psub0)
1793 Pred = MRI->getUniqueVRegDef(Op.getReg());
1794 }
1795
1796 unsigned PredOpcode = Pred->getOpcode();
1797 auto NewOp = canRemovePTestInstr(PTest, Mask, Pred, MRI);
1798 if (!NewOp)
1799 return false;
1800
1801 const TargetRegisterInfo *TRI = &getRegisterInfo();
1802
1803 // If another instruction between Pred and PTest accesses flags, don't remove
1804 // the ptest or update the earlier instruction to modify them.
1805 if (areCFlagsAccessedBetweenInstrs(Pred, PTest, TRI))
1806 return false;
1807
1808 // If we pass all the checks, it's safe to remove the PTEST and use the flags
1809 // as they are prior to PTEST. Sometimes this requires the tested PTEST
1810 // operand to be replaced with an equivalent instruction that also sets the
1811 // flags.
1812 PTest->eraseFromParent();
1813 if (*NewOp != PredOpcode) {
1814 Pred->setDesc(get(*NewOp));
1815 bool succeeded = UpdateOperandRegClass(*Pred);
1816 (void)succeeded;
1817 assert(succeeded && "Operands have incompatible register classes!");
1818 Pred->addRegisterDefined(AArch64::NZCV, TRI);
1819 }
1820
1821 // Ensure that the flags def is live.
1822 if (Pred->registerDefIsDead(AArch64::NZCV, TRI)) {
1823 unsigned i = 0, e = Pred->getNumOperands();
1824 for (; i != e; ++i) {
1825 MachineOperand &MO = Pred->getOperand(i);
1826 if (MO.isReg() && MO.isDef() && MO.getReg() == AArch64::NZCV) {
1827 MO.setIsDead(false);
1828 break;
1829 }
1830 }
1831 }
1832 return true;
1833}
1834
1835/// Try to optimize a compare instruction. A compare instruction is an
1836/// instruction which produces AArch64::NZCV. It can be truly compare
1837/// instruction
1838/// when there are no uses of its destination register.
1839///
1840/// The following steps are tried in order:
1841/// 1. Convert CmpInstr into an unconditional version.
1842/// 2. Remove CmpInstr if above there is an instruction producing a needed
1843/// condition code or an instruction which can be converted into such an
1844/// instruction.
1845/// Only comparison with zero is supported.
1847 MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask,
1848 int64_t CmpValue, const MachineRegisterInfo *MRI) const {
1849 assert(CmpInstr.getParent());
1850 assert(MRI);
1851
1852 // Replace SUBSWrr with SUBWrr if NZCV is not used.
1853 int DeadNZCVIdx =
1854 CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
1855 if (DeadNZCVIdx != -1) {
1856 if (CmpInstr.definesRegister(AArch64::WZR, /*TRI=*/nullptr) ||
1857 CmpInstr.definesRegister(AArch64::XZR, /*TRI=*/nullptr)) {
1858 CmpInstr.eraseFromParent();
1859 return true;
1860 }
1861 unsigned Opc = CmpInstr.getOpcode();
1862 unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1863 if (NewOpc == Opc)
1864 return false;
1865 const MCInstrDesc &MCID = get(NewOpc);
1866 CmpInstr.setDesc(MCID);
1867 CmpInstr.removeOperand(DeadNZCVIdx);
1868 bool succeeded = UpdateOperandRegClass(CmpInstr);
1869 (void)succeeded;
1870 assert(succeeded && "Some operands reg class are incompatible!");
1871 return true;
1872 }
1873
1874 if (CmpInstr.getOpcode() == AArch64::PTEST_PP ||
1875 CmpInstr.getOpcode() == AArch64::PTEST_PP_ANY ||
1876 CmpInstr.getOpcode() == AArch64::PTEST_PP_FIRST)
1877 return optimizePTestInstr(&CmpInstr, SrcReg, SrcReg2, MRI);
1878
1879 if (SrcReg2 != 0)
1880 return false;
1881
1882 // CmpInstr is a Compare instruction if destination register is not used.
1883 if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1884 return false;
1885
1886 if (CmpValue == 0 && substituteCmpToZero(CmpInstr, SrcReg, *MRI))
1887 return true;
1888 return (CmpValue == 0 || CmpValue == 1) &&
1889 removeCmpToZeroOrOne(CmpInstr, SrcReg, CmpValue, *MRI);
1890}
1891
1892/// Get opcode of S version of Instr.
1893/// If Instr is S version its opcode is returned.
1894/// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1895/// or we are not interested in it.
1896static unsigned sForm(MachineInstr &Instr) {
1897 switch (Instr.getOpcode()) {
1898 default:
1899 return AArch64::INSTRUCTION_LIST_END;
1900
1901 case AArch64::ADDSWrr:
1902 case AArch64::ADDSWri:
1903 case AArch64::ADDSXrr:
1904 case AArch64::ADDSXri:
1905 case AArch64::ADDSWrx:
1906 case AArch64::ADDSXrx:
1907 case AArch64::SUBSWrr:
1908 case AArch64::SUBSWri:
1909 case AArch64::SUBSWrx:
1910 case AArch64::SUBSXrr:
1911 case AArch64::SUBSXri:
1912 case AArch64::SUBSXrx:
1913 case AArch64::ANDSWri:
1914 case AArch64::ANDSWrr:
1915 case AArch64::ANDSWrs:
1916 case AArch64::ANDSXri:
1917 case AArch64::ANDSXrr:
1918 case AArch64::ANDSXrs:
1919 case AArch64::BICSWrr:
1920 case AArch64::BICSXrr:
1921 case AArch64::BICSWrs:
1922 case AArch64::BICSXrs:
1923 return Instr.getOpcode();
1924
1925 case AArch64::ADDWrr:
1926 return AArch64::ADDSWrr;
1927 case AArch64::ADDWri:
1928 return AArch64::ADDSWri;
1929 case AArch64::ADDXrr:
1930 return AArch64::ADDSXrr;
1931 case AArch64::ADDXri:
1932 return AArch64::ADDSXri;
1933 case AArch64::ADDWrx:
1934 return AArch64::ADDSWrx;
1935 case AArch64::ADDXrx:
1936 return AArch64::ADDSXrx;
1937 case AArch64::ADCWr:
1938 return AArch64::ADCSWr;
1939 case AArch64::ADCXr:
1940 return AArch64::ADCSXr;
1941 case AArch64::SUBWrr:
1942 return AArch64::SUBSWrr;
1943 case AArch64::SUBWri:
1944 return AArch64::SUBSWri;
1945 case AArch64::SUBXrr:
1946 return AArch64::SUBSXrr;
1947 case AArch64::SUBXri:
1948 return AArch64::SUBSXri;
1949 case AArch64::SUBWrx:
1950 return AArch64::SUBSWrx;
1951 case AArch64::SUBXrx:
1952 return AArch64::SUBSXrx;
1953 case AArch64::SBCWr:
1954 return AArch64::SBCSWr;
1955 case AArch64::SBCXr:
1956 return AArch64::SBCSXr;
1957 case AArch64::ANDWri:
1958 return AArch64::ANDSWri;
1959 case AArch64::ANDXri:
1960 return AArch64::ANDSXri;
1961 case AArch64::ANDWrr:
1962 return AArch64::ANDSWrr;
1963 case AArch64::ANDWrs:
1964 return AArch64::ANDSWrs;
1965 case AArch64::ANDXrr:
1966 return AArch64::ANDSXrr;
1967 case AArch64::ANDXrs:
1968 return AArch64::ANDSXrs;
1969 case AArch64::BICWrr:
1970 return AArch64::BICSWrr;
1971 case AArch64::BICXrr:
1972 return AArch64::BICSXrr;
1973 case AArch64::BICWrs:
1974 return AArch64::BICSWrs;
1975 case AArch64::BICXrs:
1976 return AArch64::BICSXrs;
1977 }
1978}
1979
1980/// Check if AArch64::NZCV should be alive in successors of MBB.
1982 for (auto *BB : MBB->successors())
1983 if (BB->isLiveIn(AArch64::NZCV))
1984 return true;
1985 return false;
1986}
1987
1988/// \returns The condition code operand index for \p Instr if it is a branch
1989/// or select and -1 otherwise.
1990static int
1992 switch (Instr.getOpcode()) {
1993 default:
1994 return -1;
1995
1996 case AArch64::Bcc: {
1997 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
1998 assert(Idx >= 2);
1999 return Idx - 2;
2000 }
2001
2002 case AArch64::CSINVWr:
2003 case AArch64::CSINVXr:
2004 case AArch64::CSINCWr:
2005 case AArch64::CSINCXr:
2006 case AArch64::CSELWr:
2007 case AArch64::CSELXr:
2008 case AArch64::CSNEGWr:
2009 case AArch64::CSNEGXr:
2010 case AArch64::FCSELSrrr:
2011 case AArch64::FCSELDrrr: {
2012 int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV, /*TRI=*/nullptr);
2013 assert(Idx >= 1);
2014 return Idx - 1;
2015 }
2016 }
2017}
2018
2019/// Find a condition code used by the instruction.
2020/// Returns AArch64CC::Invalid if either the instruction does not use condition
2021/// codes or we don't optimize CmpInstr in the presence of such instructions.
2024 return CCIdx >= 0 ? static_cast<AArch64CC::CondCode>(
2025 Instr.getOperand(CCIdx).getImm())
2027}
2028
2031 UsedNZCV UsedFlags;
2032 switch (CC) {
2033 default:
2034 break;
2035
2036 case AArch64CC::EQ: // Z set
2037 case AArch64CC::NE: // Z clear
2038 UsedFlags.Z = true;
2039 break;
2040
2041 case AArch64CC::HI: // Z clear and C set
2042 case AArch64CC::LS: // Z set or C clear
2043 UsedFlags.Z = true;
2044 [[fallthrough]];
2045 case AArch64CC::HS: // C set
2046 case AArch64CC::LO: // C clear
2047 UsedFlags.C = true;
2048 break;
2049
2050 case AArch64CC::MI: // N set
2051 case AArch64CC::PL: // N clear
2052 UsedFlags.N = true;
2053 break;
2054
2055 case AArch64CC::VS: // V set
2056 case AArch64CC::VC: // V clear
2057 UsedFlags.V = true;
2058 break;
2059
2060 case AArch64CC::GT: // Z clear, N and V the same
2061 case AArch64CC::LE: // Z set, N and V differ
2062 UsedFlags.Z = true;
2063 [[fallthrough]];
2064 case AArch64CC::GE: // N and V the same
2065 case AArch64CC::LT: // N and V differ
2066 UsedFlags.N = true;
2067 UsedFlags.V = true;
2068 break;
2069 }
2070 return UsedFlags;
2071}
2072
2073/// \returns Conditions flags used after \p CmpInstr in its MachineBB if NZCV
2074/// flags are not alive in successors of the same \p CmpInstr and \p MI parent.
2075/// \returns std::nullopt otherwise.
2076///
2077/// Collect instructions using that flags in \p CCUseInstrs if provided.
2078std::optional<UsedNZCV>
2080 const TargetRegisterInfo &TRI,
2081 SmallVectorImpl<MachineInstr *> *CCUseInstrs) {
2082 MachineBasicBlock *CmpParent = CmpInstr.getParent();
2083 if (MI.getParent() != CmpParent)
2084 return std::nullopt;
2085
2086 if (areCFlagsAliveInSuccessors(CmpParent))
2087 return std::nullopt;
2088
2089 UsedNZCV NZCVUsedAfterCmp;
2091 std::next(CmpInstr.getIterator()), CmpParent->instr_end())) {
2092 if (Instr.readsRegister(AArch64::NZCV, &TRI)) {
2094 if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
2095 return std::nullopt;
2096 NZCVUsedAfterCmp |= getUsedNZCV(CC);
2097 if (CCUseInstrs)
2098 CCUseInstrs->push_back(&Instr);
2099 }
2100 if (Instr.modifiesRegister(AArch64::NZCV, &TRI))
2101 break;
2102 }
2103 return NZCVUsedAfterCmp;
2104}
2105
2106static bool isADDSRegImm(unsigned Opcode) {
2107 return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
2108}
2109
2110static bool isSUBSRegImm(unsigned Opcode) {
2111 return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
2112}
2113
2115 unsigned Opc = sForm(MI);
2116 switch (Opc) {
2117 case AArch64::ANDSWri:
2118 case AArch64::ANDSWrr:
2119 case AArch64::ANDSWrs:
2120 case AArch64::ANDSXri:
2121 case AArch64::ANDSXrr:
2122 case AArch64::ANDSXrs:
2123 case AArch64::BICSWrr:
2124 case AArch64::BICSXrr:
2125 case AArch64::BICSWrs:
2126 case AArch64::BICSXrs:
2127 return true;
2128 default:
2129 return false;
2130 }
2131}
2132
2133/// Check if CmpInstr can be substituted by MI.
2134///
2135/// CmpInstr can be substituted:
2136/// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2137/// - and, MI and CmpInstr are from the same MachineBB
2138/// - and, condition flags are not alive in successors of the CmpInstr parent
2139/// - and, if MI opcode is the S form there must be no defs of flags between
2140/// MI and CmpInstr
2141/// or if MI opcode is not the S form there must be neither defs of flags
2142/// nor uses of flags between MI and CmpInstr.
2143/// - and, if C/V flags are not used after CmpInstr
2144/// or if N flag is used but MI produces poison value if signed overflow
2145/// occurs.
2147 const TargetRegisterInfo &TRI) {
2148 // NOTE this assertion guarantees that MI.getOpcode() is add or subtraction
2149 // that may or may not set flags.
2150 assert(sForm(MI) != AArch64::INSTRUCTION_LIST_END);
2151
2152 const unsigned CmpOpcode = CmpInstr.getOpcode();
2153 if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
2154 return false;
2155
2156 assert((CmpInstr.getOperand(2).isImm() &&
2157 CmpInstr.getOperand(2).getImm() == 0) &&
2158 "Caller guarantees that CmpInstr compares with constant 0");
2159
2160 std::optional<UsedNZCV> NZVCUsed = examineCFlagsUse(MI, CmpInstr, TRI);
2161 if (!NZVCUsed || NZVCUsed->C)
2162 return false;
2163
2164 // CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0', and MI is either
2165 // '%vreg = add ...' or '%vreg = sub ...'.
2166 // Condition flag V is used to indicate signed overflow.
2167 // 1) MI and CmpInstr set N and V to the same value.
2168 // 2) If MI is add/sub with no-signed-wrap, it produces a poison value when
2169 // signed overflow occurs, so CmpInstr could still be simplified away.
2170 // Note that Ands and Bics instructions always clear the V flag.
2171 if (NZVCUsed->V && !MI.getFlag(MachineInstr::NoSWrap) && !isANDOpcode(MI))
2172 return false;
2173
2174 AccessKind AccessToCheck = AK_Write;
2175 if (sForm(MI) != MI.getOpcode())
2176 AccessToCheck = AK_All;
2177 return !areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AccessToCheck);
2178}
2179
2180/// Substitute an instruction comparing to zero with another instruction
2181/// which produces needed condition flags.
2182///
2183/// Return true on success.
2184bool AArch64InstrInfo::substituteCmpToZero(
2185 MachineInstr &CmpInstr, unsigned SrcReg,
2186 const MachineRegisterInfo &MRI) const {
2187 // Get the unique definition of SrcReg.
2188 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2189 if (!MI)
2190 return false;
2191
2192 const TargetRegisterInfo &TRI = getRegisterInfo();
2193
2194 unsigned NewOpc = sForm(*MI);
2195 if (NewOpc == AArch64::INSTRUCTION_LIST_END)
2196 return false;
2197
2198 if (!canInstrSubstituteCmpInstr(*MI, CmpInstr, TRI))
2199 return false;
2200
2201 // Update the instruction to set NZCV.
2202 MI->setDesc(get(NewOpc));
2203 CmpInstr.eraseFromParent();
2205 (void)succeeded;
2206 assert(succeeded && "Some operands reg class are incompatible!");
2207 MI->addRegisterDefined(AArch64::NZCV, &TRI);
2208 return true;
2209}
2210
2211/// \returns True if \p CmpInstr can be removed.
2212///
2213/// \p IsInvertCC is true if, after removing \p CmpInstr, condition
2214/// codes used in \p CCUseInstrs must be inverted.
2216 int CmpValue, const TargetRegisterInfo &TRI,
2218 bool &IsInvertCC) {
2219 assert((CmpValue == 0 || CmpValue == 1) &&
2220 "Only comparisons to 0 or 1 considered for removal!");
2221
2222 // MI is 'CSINCWr %vreg, wzr, wzr, <cc>' or 'CSINCXr %vreg, xzr, xzr, <cc>'
2223 unsigned MIOpc = MI.getOpcode();
2224 if (MIOpc == AArch64::CSINCWr) {
2225 if (MI.getOperand(1).getReg() != AArch64::WZR ||
2226 MI.getOperand(2).getReg() != AArch64::WZR)
2227 return false;
2228 } else if (MIOpc == AArch64::CSINCXr) {
2229 if (MI.getOperand(1).getReg() != AArch64::XZR ||
2230 MI.getOperand(2).getReg() != AArch64::XZR)
2231 return false;
2232 } else {
2233 return false;
2234 }
2236 if (MICC == AArch64CC::Invalid)
2237 return false;
2238
2239 // NZCV needs to be defined
2240 if (MI.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) != -1)
2241 return false;
2242
2243 // CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0' or 'SUBS %vreg, 1'
2244 const unsigned CmpOpcode = CmpInstr.getOpcode();
2245 bool IsSubsRegImm = isSUBSRegImm(CmpOpcode);
2246 if (CmpValue && !IsSubsRegImm)
2247 return false;
2248 if (!CmpValue && !IsSubsRegImm && !isADDSRegImm(CmpOpcode))
2249 return false;
2250
2251 // MI conditions allowed: eq, ne, mi, pl
2252 UsedNZCV MIUsedNZCV = getUsedNZCV(MICC);
2253 if (MIUsedNZCV.C || MIUsedNZCV.V)
2254 return false;
2255
2256 std::optional<UsedNZCV> NZCVUsedAfterCmp =
2257 examineCFlagsUse(MI, CmpInstr, TRI, &CCUseInstrs);
2258 // Condition flags are not used in CmpInstr basic block successors and only
2259 // Z or N flags allowed to be used after CmpInstr within its basic block
2260 if (!NZCVUsedAfterCmp || NZCVUsedAfterCmp->C || NZCVUsedAfterCmp->V)
2261 return false;
2262 // Z or N flag used after CmpInstr must correspond to the flag used in MI
2263 if ((MIUsedNZCV.Z && NZCVUsedAfterCmp->N) ||
2264 (MIUsedNZCV.N && NZCVUsedAfterCmp->Z))
2265 return false;
2266 // If CmpInstr is comparison to zero MI conditions are limited to eq, ne
2267 if (MIUsedNZCV.N && !CmpValue)
2268 return false;
2269
2270 // There must be no defs of flags between MI and CmpInstr
2271 if (areCFlagsAccessedBetweenInstrs(&MI, &CmpInstr, &TRI, AK_Write))
2272 return false;
2273
2274 // Condition code is inverted in the following cases:
2275 // 1. MI condition is ne; CmpInstr is 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
2276 // 2. MI condition is eq, pl; CmpInstr is 'SUBS %vreg, 1'
2277 IsInvertCC = (CmpValue && (MICC == AArch64CC::EQ || MICC == AArch64CC::PL)) ||
2278 (!CmpValue && MICC == AArch64CC::NE);
2279 return true;
2280}
2281
2282/// Remove comparison in csinc-cmp sequence
2283///
2284/// Examples:
2285/// 1. \code
2286/// csinc w9, wzr, wzr, ne
2287/// cmp w9, #0
2288/// b.eq
2289/// \endcode
2290/// to
2291/// \code
2292/// csinc w9, wzr, wzr, ne
2293/// b.ne
2294/// \endcode
2295///
2296/// 2. \code
2297/// csinc x2, xzr, xzr, mi
2298/// cmp x2, #1
2299/// b.pl
2300/// \endcode
2301/// to
2302/// \code
2303/// csinc x2, xzr, xzr, mi
2304/// b.pl
2305/// \endcode
2306///
2307/// \param CmpInstr comparison instruction
2308/// \return True when comparison removed
2309bool AArch64InstrInfo::removeCmpToZeroOrOne(
2310 MachineInstr &CmpInstr, unsigned SrcReg, int CmpValue,
2311 const MachineRegisterInfo &MRI) const {
2312 MachineInstr *MI = MRI.getUniqueVRegDef(SrcReg);
2313 if (!MI)
2314 return false;
2315 const TargetRegisterInfo &TRI = getRegisterInfo();
2316 SmallVector<MachineInstr *, 4> CCUseInstrs;
2317 bool IsInvertCC = false;
2318 if (!canCmpInstrBeRemoved(*MI, CmpInstr, CmpValue, TRI, CCUseInstrs,
2319 IsInvertCC))
2320 return false;
2321 // Make transformation
2322 CmpInstr.eraseFromParent();
2323 if (IsInvertCC) {
2324 // Invert condition codes in CmpInstr CC users
2325 for (MachineInstr *CCUseInstr : CCUseInstrs) {
2326 int Idx = findCondCodeUseOperandIdxForBranchOrSelect(*CCUseInstr);
2327 assert(Idx >= 0 && "Unexpected instruction using CC.");
2328 MachineOperand &CCOperand = CCUseInstr->getOperand(Idx);
2330 static_cast<AArch64CC::CondCode>(CCOperand.getImm()));
2331 CCOperand.setImm(CCUse);
2332 }
2333 }
2334 return true;
2335}
2336
2337bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
2338 if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD &&
2339 MI.getOpcode() != AArch64::CATCHRET)
2340 return false;
2341
2342 MachineBasicBlock &MBB = *MI.getParent();
2343 auto &Subtarget = MBB.getParent()->getSubtarget<AArch64Subtarget>();
2344 auto TRI = Subtarget.getRegisterInfo();
2345 DebugLoc DL = MI.getDebugLoc();
2346
2347 if (MI.getOpcode() == AArch64::CATCHRET) {
2348 // Skip to the first instruction before the epilog.
2349 const TargetInstrInfo *TII =
2351 MachineBasicBlock *TargetMBB = MI.getOperand(0).getMBB();
2353 MachineBasicBlock::iterator FirstEpilogSEH = std::prev(MBBI);
2354 while (FirstEpilogSEH->getFlag(MachineInstr::FrameDestroy) &&
2355 FirstEpilogSEH != MBB.begin())
2356 FirstEpilogSEH = std::prev(FirstEpilogSEH);
2357 if (FirstEpilogSEH != MBB.begin())
2358 FirstEpilogSEH = std::next(FirstEpilogSEH);
2359 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADRP))
2360 .addReg(AArch64::X0, RegState::Define)
2361 .addMBB(TargetMBB);
2362 BuildMI(MBB, FirstEpilogSEH, DL, TII->get(AArch64::ADDXri))
2363 .addReg(AArch64::X0, RegState::Define)
2364 .addReg(AArch64::X0)
2365 .addMBB(TargetMBB)
2366 .addImm(0);
2367 TargetMBB->setMachineBlockAddressTaken();
2368 return true;
2369 }
2370
2371 Register Reg = MI.getOperand(0).getReg();
2373 if (M.getStackProtectorGuard() == "sysreg") {
2374 const AArch64SysReg::SysReg *SrcReg =
2375 AArch64SysReg::lookupSysRegByName(M.getStackProtectorGuardReg());
2376 if (!SrcReg)
2377 report_fatal_error("Unknown SysReg for Stack Protector Guard Register");
2378
2379 // mrs xN, sysreg
2380 BuildMI(MBB, MI, DL, get(AArch64::MRS))
2382 .addImm(SrcReg->Encoding);
2383 int Offset = M.getStackProtectorGuardOffset();
2384 if (Offset >= 0 && Offset <= 32760 && Offset % 8 == 0) {
2385 // ldr xN, [xN, #offset]
2386 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2387 .addDef(Reg)
2389 .addImm(Offset / 8);
2390 } else if (Offset >= -256 && Offset <= 255) {
2391 // ldur xN, [xN, #offset]
2392 BuildMI(MBB, MI, DL, get(AArch64::LDURXi))
2393 .addDef(Reg)
2395 .addImm(Offset);
2396 } else if (Offset >= -4095 && Offset <= 4095) {
2397 if (Offset > 0) {
2398 // add xN, xN, #offset
2399 BuildMI(MBB, MI, DL, get(AArch64::ADDXri))
2400 .addDef(Reg)
2402 .addImm(Offset)
2403 .addImm(0);
2404 } else {
2405 // sub xN, xN, #offset
2406 BuildMI(MBB, MI, DL, get(AArch64::SUBXri))
2407 .addDef(Reg)
2409 .addImm(-Offset)
2410 .addImm(0);
2411 }
2412 // ldr xN, [xN]
2413 BuildMI(MBB, MI, DL, get(AArch64::LDRXui))
2414 .addDef(Reg)
2416 .addImm(0);
2417 } else {
2418 // Cases that are larger than +/- 4095 and not a multiple of 8, or larger
2419 // than 23760.
2420 // It might be nice to use AArch64::MOVi32imm here, which would get
2421 // expanded in PreSched2 after PostRA, but our lone scratch Reg already
2422 // contains the MRS result. findScratchNonCalleeSaveRegister() in
2423 // AArch64FrameLowering might help us find such a scratch register
2424 // though. If we failed to find a scratch register, we could emit a
2425 // stream of add instructions to build up the immediate. Or, we could try
2426 // to insert a AArch64::MOVi32imm before register allocation so that we
2427 // didn't need to scavenge for a scratch register.
2428 report_fatal_error("Unable to encode Stack Protector Guard Offset");
2429 }
2430 MBB.erase(MI);
2431 return true;
2432 }
2433
2434 const GlobalValue *GV =
2435 cast<GlobalValue>((*MI.memoperands_begin())->getValue());
2436 const TargetMachine &TM = MBB.getParent()->getTarget();
2437 unsigned OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
2438 const unsigned char MO_NC = AArch64II::MO_NC;
2439
2440 if ((OpFlags & AArch64II::MO_GOT) != 0) {
2441 BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
2442 .addGlobalAddress(GV, 0, OpFlags);
2443 if (Subtarget.isTargetILP32()) {
2444 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2445 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2446 .addDef(Reg32, RegState::Dead)
2448 .addImm(0)
2449 .addMemOperand(*MI.memoperands_begin())
2451 } else {
2452 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2454 .addImm(0)
2455 .addMemOperand(*MI.memoperands_begin());
2456 }
2457 } else if (TM.getCodeModel() == CodeModel::Large) {
2458 assert(!Subtarget.isTargetILP32() && "how can large exist in ILP32?");
2459 BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
2460 .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
2461 .addImm(0);
2462 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2464 .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
2465 .addImm(16);
2466 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2468 .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
2469 .addImm(32);
2470 BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
2473 .addImm(48);
2474 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2476 .addImm(0)
2477 .addMemOperand(*MI.memoperands_begin());
2478 } else if (TM.getCodeModel() == CodeModel::Tiny) {
2479 BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
2480 .addGlobalAddress(GV, 0, OpFlags);
2481 } else {
2482 BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
2483 .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
2484 unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
2485 if (Subtarget.isTargetILP32()) {
2486 unsigned Reg32 = TRI->getSubReg(Reg, AArch64::sub_32);
2487 BuildMI(MBB, MI, DL, get(AArch64::LDRWui))
2488 .addDef(Reg32, RegState::Dead)
2490 .addGlobalAddress(GV, 0, LoFlags)
2491 .addMemOperand(*MI.memoperands_begin())
2493 } else {
2494 BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
2496 .addGlobalAddress(GV, 0, LoFlags)
2497 .addMemOperand(*MI.memoperands_begin());
2498 }
2499 }
2500
2501 MBB.erase(MI);
2502
2503 return true;
2504}
2505
2506// Return true if this instruction simply sets its single destination register
2507// to zero. This is equivalent to a register rename of the zero-register.
2509 switch (MI.getOpcode()) {
2510 default:
2511 break;
2512 case AArch64::MOVZWi:
2513 case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
2514 if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
2515 assert(MI.getDesc().getNumOperands() == 3 &&
2516 MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
2517 return true;
2518 }
2519 break;
2520 case AArch64::ANDWri: // and Rd, Rzr, #imm
2521 return MI.getOperand(1).getReg() == AArch64::WZR;
2522 case AArch64::ANDXri:
2523 return MI.getOperand(1).getReg() == AArch64::XZR;
2524 case TargetOpcode::COPY:
2525 return MI.getOperand(1).getReg() == AArch64::WZR;
2526 }
2527 return false;
2528}
2529
2530// Return true if this instruction simply renames a general register without
2531// modifying bits.
2533 switch (MI.getOpcode()) {
2534 default:
2535 break;
2536 case TargetOpcode::COPY: {
2537 // GPR32 copies will by lowered to ORRXrs
2538 Register DstReg = MI.getOperand(0).getReg();
2539 return (AArch64::GPR32RegClass.contains(DstReg) ||
2540 AArch64::GPR64RegClass.contains(DstReg));
2541 }
2542 case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
2543 if (MI.getOperand(1).getReg() == AArch64::XZR) {
2544 assert(MI.getDesc().getNumOperands() == 4 &&
2545 MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
2546 return true;
2547 }
2548 break;
2549 case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
2550 if (MI.getOperand(2).getImm() == 0) {
2551 assert(MI.getDesc().getNumOperands() == 4 &&
2552 MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
2553 return true;
2554 }
2555 break;
2556 }
2557 return false;
2558}
2559
2560// Return true if this instruction simply renames a general register without
2561// modifying bits.
2563 switch (MI.getOpcode()) {
2564 default:
2565 break;
2566 case TargetOpcode::COPY: {
2567 Register DstReg = MI.getOperand(0).getReg();
2568 return AArch64::FPR128RegClass.contains(DstReg);
2569 }
2570 case AArch64::ORRv16i8:
2571 if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
2572 assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
2573 "invalid ORRv16i8 operands");
2574 return true;
2575 }
2576 break;
2577 }
2578 return false;
2579}
2580
2581static bool isFrameLoadOpcode(int Opcode) {
2582 switch (Opcode) {
2583 default:
2584 return false;
2585 case AArch64::LDRWui:
2586 case AArch64::LDRXui:
2587 case AArch64::LDRBui:
2588 case AArch64::LDRHui:
2589 case AArch64::LDRSui:
2590 case AArch64::LDRDui:
2591 case AArch64::LDRQui:
2592 case AArch64::LDR_PXI:
2593 return true;
2594 }
2595}
2596
2598 int &FrameIndex) const {
2599 if (!isFrameLoadOpcode(MI.getOpcode()))
2600 return Register();
2601
2602 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2603 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2604 FrameIndex = MI.getOperand(1).getIndex();
2605 return MI.getOperand(0).getReg();
2606 }
2607 return Register();
2608}
2609
2610static bool isFrameStoreOpcode(int Opcode) {
2611 switch (Opcode) {
2612 default:
2613 return false;
2614 case AArch64::STRWui:
2615 case AArch64::STRXui:
2616 case AArch64::STRBui:
2617 case AArch64::STRHui:
2618 case AArch64::STRSui:
2619 case AArch64::STRDui:
2620 case AArch64::STRQui:
2621 case AArch64::STR_PXI:
2622 return true;
2623 }
2624}
2625
2627 int &FrameIndex) const {
2628 if (!isFrameStoreOpcode(MI.getOpcode()))
2629 return Register();
2630
2631 if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
2632 MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
2633 FrameIndex = MI.getOperand(1).getIndex();
2634 return MI.getOperand(0).getReg();
2635 }
2636 return Register();
2637}
2638
2640 int &FrameIndex) const {
2641 if (!isFrameStoreOpcode(MI.getOpcode()))
2642 return Register();
2643
2644 if (Register Reg = isStoreToStackSlot(MI, FrameIndex))
2645 return Reg;
2646
2648 if (hasStoreToStackSlot(MI, Accesses)) {
2649 if (Accesses.size() > 1)
2650 return Register();
2651
2652 FrameIndex =
2653 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
2654 ->getFrameIndex();
2655 return MI.getOperand(0).getReg();
2656 }
2657 return Register();
2658}
2659
2661 int &FrameIndex) const {
2662 if (!isFrameLoadOpcode(MI.getOpcode()))
2663 return Register();
2664
2665 if (Register Reg = isLoadFromStackSlot(MI, FrameIndex))
2666 return Reg;
2667
2669 if (hasLoadFromStackSlot(MI, Accesses)) {
2670 if (Accesses.size() > 1)
2671 return Register();
2672
2673 FrameIndex =
2674 cast<FixedStackPseudoSourceValue>(Accesses.front()->getPseudoValue())
2675 ->getFrameIndex();
2676 return MI.getOperand(0).getReg();
2677 }
2678 return Register();
2679}
2680
2681/// Check all MachineMemOperands for a hint to suppress pairing.
2683 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2684 return MMO->getFlags() & MOSuppressPair;
2685 });
2686}
2687
2688/// Set a flag on the first MachineMemOperand to suppress pairing.
2690 if (MI.memoperands_empty())
2691 return;
2692 (*MI.memoperands_begin())->setFlags(MOSuppressPair);
2693}
2694
2695/// Check all MachineMemOperands for a hint that the load/store is strided.
2697 return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
2698 return MMO->getFlags() & MOStridedAccess;
2699 });
2700}
2701
2703 switch (Opc) {
2704 default:
2705 return false;
2706 case AArch64::STURSi:
2707 case AArch64::STRSpre:
2708 case AArch64::STURDi:
2709 case AArch64::STRDpre:
2710 case AArch64::STURQi:
2711 case AArch64::STRQpre:
2712 case AArch64::STURBBi:
2713 case AArch64::STURHHi:
2714 case AArch64::STURWi:
2715 case AArch64::STRWpre:
2716 case AArch64::STURXi:
2717 case AArch64::STRXpre:
2718 case AArch64::LDURSi:
2719 case AArch64::LDRSpre:
2720 case AArch64::LDURDi:
2721 case AArch64::LDRDpre:
2722 case AArch64::LDURQi:
2723 case AArch64::LDRQpre:
2724 case AArch64::LDURWi:
2725 case AArch64::LDRWpre:
2726 case AArch64::LDURXi:
2727 case AArch64::LDRXpre:
2728 case AArch64::LDRSWpre:
2729 case AArch64::LDURSWi:
2730 case AArch64::LDURHHi:
2731 case AArch64::LDURBBi:
2732 case AArch64::LDURSBWi:
2733 case AArch64::LDURSHWi:
2734 return true;
2735 }
2736}
2737
2738std::optional<unsigned> AArch64InstrInfo::getUnscaledLdSt(unsigned Opc) {
2739 switch (Opc) {
2740 default: return {};
2741 case AArch64::PRFMui: return AArch64::PRFUMi;
2742 case AArch64::LDRXui: return AArch64::LDURXi;
2743 case AArch64::LDRWui: return AArch64::LDURWi;
2744 case AArch64::LDRBui: return AArch64::LDURBi;
2745 case AArch64::LDRHui: return AArch64::LDURHi;
2746 case AArch64::LDRSui: return AArch64::LDURSi;
2747 case AArch64::LDRDui: return AArch64::LDURDi;
2748 case AArch64::LDRQui: return AArch64::LDURQi;
2749 case AArch64::LDRBBui: return AArch64::LDURBBi;
2750 case AArch64::LDRHHui: return AArch64::LDURHHi;
2751 case AArch64::LDRSBXui: return AArch64::LDURSBXi;
2752 case AArch64::LDRSBWui: return AArch64::LDURSBWi;
2753 case AArch64::LDRSHXui: return AArch64::LDURSHXi;
2754 case AArch64::LDRSHWui: return AArch64::LDURSHWi;
2755 case AArch64::LDRSWui: return AArch64::LDURSWi;
2756 case AArch64::STRXui: return AArch64::STURXi;
2757 case AArch64::STRWui: return AArch64::STURWi;
2758 case AArch64::STRBui: return AArch64::STURBi;
2759 case AArch64::STRHui: return AArch64::STURHi;
2760 case AArch64::STRSui: return AArch64::STURSi;
2761 case AArch64::STRDui: return AArch64::STURDi;
2762 case AArch64::STRQui: return AArch64::STURQi;
2763 case AArch64::STRBBui: return AArch64::STURBBi;
2764 case AArch64::STRHHui: return AArch64::STURHHi;
2765 }
2766}
2767
2769 switch (Opc) {
2770 default:
2771 llvm_unreachable("Unhandled Opcode in getLoadStoreImmIdx");
2772 case AArch64::ADDG:
2773 case AArch64::LDAPURBi:
2774 case AArch64::LDAPURHi:
2775 case AArch64::LDAPURi:
2776 case AArch64::LDAPURSBWi:
2777 case AArch64::LDAPURSBXi:
2778 case AArch64::LDAPURSHWi:
2779 case AArch64::LDAPURSHXi:
2780 case AArch64::LDAPURSWi:
2781 case AArch64::LDAPURXi:
2782 case AArch64::LDR_PPXI:
2783 case AArch64::LDR_PXI:
2784 case AArch64::LDR_ZXI:
2785 case AArch64::LDR_ZZXI:
2786 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
2787 case AArch64::LDR_ZZZXI:
2788 case AArch64::LDR_ZZZZXI:
2789 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
2790 case AArch64::LDRBBui:
2791 case AArch64::LDRBui:
2792 case AArch64::LDRDui:
2793 case AArch64::LDRHHui:
2794 case AArch64::LDRHui:
2795 case AArch64::LDRQui:
2796 case AArch64::LDRSBWui:
2797 case AArch64::LDRSBXui:
2798 case AArch64::LDRSHWui:
2799 case AArch64::LDRSHXui:
2800 case AArch64::LDRSui:
2801 case AArch64::LDRSWui:
2802 case AArch64::LDRWui:
2803 case AArch64::LDRXui:
2804 case AArch64::LDURBBi:
2805 case AArch64::LDURBi:
2806 case AArch64::LDURDi:
2807 case AArch64::LDURHHi:
2808 case AArch64::LDURHi:
2809 case AArch64::LDURQi:
2810 case AArch64::LDURSBWi:
2811 case AArch64::LDURSBXi:
2812 case AArch64::LDURSHWi:
2813 case AArch64::LDURSHXi:
2814 case AArch64::LDURSi:
2815 case AArch64::LDURSWi:
2816 case AArch64::LDURWi:
2817 case AArch64::LDURXi:
2818 case AArch64::PRFMui:
2819 case AArch64::PRFUMi:
2820 case AArch64::ST2Gi:
2821 case AArch64::STGi:
2822 case AArch64::STLURBi:
2823 case AArch64::STLURHi:
2824 case AArch64::STLURWi:
2825 case AArch64::STLURXi:
2826 case AArch64::StoreSwiftAsyncContext:
2827 case AArch64::STR_PPXI:
2828 case AArch64::STR_PXI:
2829 case AArch64::STR_ZXI:
2830 case AArch64::STR_ZZXI:
2831 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
2832 case AArch64::STR_ZZZXI:
2833 case AArch64::STR_ZZZZXI:
2834 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
2835 case AArch64::STRBBui:
2836 case AArch64::STRBui:
2837 case AArch64::STRDui:
2838 case AArch64::STRHHui:
2839 case AArch64::STRHui:
2840 case AArch64::STRQui:
2841 case AArch64::STRSui:
2842 case AArch64::STRWui:
2843 case AArch64::STRXui:
2844 case AArch64::STURBBi:
2845 case AArch64::STURBi:
2846 case AArch64::STURDi:
2847 case AArch64::STURHHi:
2848 case AArch64::STURHi:
2849 case AArch64::STURQi:
2850 case AArch64::STURSi:
2851 case AArch64::STURWi:
2852 case AArch64::STURXi:
2853 case AArch64::STZ2Gi:
2854 case AArch64::STZGi:
2855 case AArch64::TAGPstack:
2856 return 2;
2857 case AArch64::LD1B_D_IMM:
2858 case AArch64::LD1B_H_IMM:
2859 case AArch64::LD1B_IMM:
2860 case AArch64::LD1B_S_IMM:
2861 case AArch64::LD1D_IMM:
2862 case AArch64::LD1H_D_IMM:
2863 case AArch64::LD1H_IMM:
2864 case AArch64::LD1H_S_IMM:
2865 case AArch64::LD1RB_D_IMM:
2866 case AArch64::LD1RB_H_IMM:
2867 case AArch64::LD1RB_IMM:
2868 case AArch64::LD1RB_S_IMM:
2869 case AArch64::LD1RD_IMM:
2870 case AArch64::LD1RH_D_IMM:
2871 case AArch64::LD1RH_IMM:
2872 case AArch64::LD1RH_S_IMM:
2873 case AArch64::LD1RSB_D_IMM:
2874 case AArch64::LD1RSB_H_IMM:
2875 case AArch64::LD1RSB_S_IMM:
2876 case AArch64::LD1RSH_D_IMM:
2877 case AArch64::LD1RSH_S_IMM:
2878 case AArch64::LD1RSW_IMM:
2879 case AArch64::LD1RW_D_IMM:
2880 case AArch64::LD1RW_IMM:
2881 case AArch64::LD1SB_D_IMM:
2882 case AArch64::LD1SB_H_IMM:
2883 case AArch64::LD1SB_S_IMM:
2884 case AArch64::LD1SH_D_IMM:
2885 case AArch64::LD1SH_S_IMM:
2886 case AArch64::LD1SW_D_IMM:
2887 case AArch64::LD1W_D_IMM:
2888 case AArch64::LD1W_IMM:
2889 case AArch64::LD2B_IMM:
2890 case AArch64::LD2D_IMM:
2891 case AArch64::LD2H_IMM:
2892 case AArch64::LD2W_IMM:
2893 case AArch64::LD3B_IMM:
2894 case AArch64::LD3D_IMM:
2895 case AArch64::LD3H_IMM:
2896 case AArch64::LD3W_IMM:
2897 case AArch64::LD4B_IMM:
2898 case AArch64::LD4D_IMM:
2899 case AArch64::LD4H_IMM:
2900 case AArch64::LD4W_IMM:
2901 case AArch64::LDG:
2902 case AArch64::LDNF1B_D_IMM:
2903 case AArch64::LDNF1B_H_IMM:
2904 case AArch64::LDNF1B_IMM:
2905 case AArch64::LDNF1B_S_IMM:
2906 case AArch64::LDNF1D_IMM:
2907 case AArch64::LDNF1H_D_IMM:
2908 case AArch64::LDNF1H_IMM:
2909 case AArch64::LDNF1H_S_IMM:
2910 case AArch64::LDNF1SB_D_IMM:
2911 case AArch64::LDNF1SB_H_IMM:
2912 case AArch64::LDNF1SB_S_IMM:
2913 case AArch64::LDNF1SH_D_IMM:
2914 case AArch64::LDNF1SH_S_IMM:
2915 case AArch64::LDNF1SW_D_IMM:
2916 case AArch64::LDNF1W_D_IMM:
2917 case AArch64::LDNF1W_IMM:
2918 case AArch64::LDNPDi:
2919 case AArch64::LDNPQi:
2920 case AArch64::LDNPSi:
2921 case AArch64::LDNPWi:
2922 case AArch64::LDNPXi:
2923 case AArch64::LDNT1B_ZRI:
2924 case AArch64::LDNT1D_ZRI:
2925 case AArch64::LDNT1H_ZRI:
2926 case AArch64::LDNT1W_ZRI:
2927 case AArch64::LDPDi:
2928 case AArch64::LDPQi:
2929 case AArch64::LDPSi:
2930 case AArch64::LDPWi:
2931 case AArch64::LDPXi:
2932 case AArch64::LDRBBpost:
2933 case AArch64::LDRBBpre:
2934 case AArch64::LDRBpost:
2935 case AArch64::LDRBpre:
2936 case AArch64::LDRDpost:
2937 case AArch64::LDRDpre:
2938 case AArch64::LDRHHpost:
2939 case AArch64::LDRHHpre:
2940 case AArch64::LDRHpost:
2941 case AArch64::LDRHpre:
2942 case AArch64::LDRQpost:
2943 case AArch64::LDRQpre:
2944 case AArch64::LDRSpost:
2945 case AArch64::LDRSpre:
2946 case AArch64::LDRWpost:
2947 case AArch64::LDRWpre:
2948 case AArch64::LDRXpost:
2949 case AArch64::LDRXpre:
2950 case AArch64::ST1B_D_IMM:
2951 case AArch64::ST1B_H_IMM:
2952 case AArch64::ST1B_IMM:
2953 case AArch64::ST1B_S_IMM:
2954 case AArch64::ST1D_IMM:
2955 case AArch64::ST1H_D_IMM:
2956 case AArch64::ST1H_IMM:
2957 case AArch64::ST1H_S_IMM:
2958 case AArch64::ST1W_D_IMM:
2959 case AArch64::ST1W_IMM:
2960 case AArch64::ST2B_IMM:
2961 case AArch64::ST2D_IMM:
2962 case AArch64::ST2H_IMM:
2963 case AArch64::ST2W_IMM:
2964 case AArch64::ST3B_IMM:
2965 case AArch64::ST3D_IMM:
2966 case AArch64::ST3H_IMM:
2967 case AArch64::ST3W_IMM:
2968 case AArch64::ST4B_IMM:
2969 case AArch64::ST4D_IMM:
2970 case AArch64::ST4H_IMM:
2971 case AArch64::ST4W_IMM:
2972 case AArch64::STGPi:
2973 case AArch64::STGPreIndex:
2974 case AArch64::STZGPreIndex:
2975 case AArch64::ST2GPreIndex:
2976 case AArch64::STZ2GPreIndex:
2977 case AArch64::STGPostIndex:
2978 case AArch64::STZGPostIndex:
2979 case AArch64::ST2GPostIndex:
2980 case AArch64::STZ2GPostIndex:
2981 case AArch64::STNPDi:
2982 case AArch64::STNPQi:
2983 case AArch64::STNPSi:
2984 case AArch64::STNPWi:
2985 case AArch64::STNPXi:
2986 case AArch64::STNT1B_ZRI:
2987 case AArch64::STNT1D_ZRI:
2988 case AArch64::STNT1H_ZRI:
2989 case AArch64::STNT1W_ZRI:
2990 case AArch64::STPDi:
2991 case AArch64::STPQi:
2992 case AArch64::STPSi:
2993 case AArch64::STPWi:
2994 case AArch64::STPXi:
2995 case AArch64::STRBBpost:
2996 case AArch64::STRBBpre:
2997 case AArch64::STRBpost:
2998 case AArch64::STRBpre:
2999 case AArch64::STRDpost:
3000 case AArch64::STRDpre:
3001 case AArch64::STRHHpost:
3002 case AArch64::STRHHpre:
3003 case AArch64::STRHpost:
3004 case AArch64::STRHpre:
3005 case AArch64::STRQpost:
3006 case AArch64::STRQpre:
3007 case AArch64::STRSpost:
3008 case AArch64::STRSpre:
3009 case AArch64::STRWpost:
3010 case AArch64::STRWpre:
3011 case AArch64::STRXpost:
3012 case AArch64::STRXpre:
3013 return 3;
3014 case AArch64::LDPDpost:
3015 case AArch64::LDPDpre:
3016 case AArch64::LDPQpost:
3017 case AArch64::LDPQpre:
3018 case AArch64::LDPSpost:
3019 case AArch64::LDPSpre:
3020 case AArch64::LDPWpost:
3021 case AArch64::LDPWpre:
3022 case AArch64::LDPXpost:
3023 case AArch64::LDPXpre:
3024 case AArch64::STGPpre:
3025 case AArch64::STGPpost:
3026 case AArch64::STPDpost:
3027 case AArch64::STPDpre:
3028 case AArch64::STPQpost:
3029 case AArch64::STPQpre:
3030 case AArch64::STPSpost:
3031 case AArch64::STPSpre:
3032 case AArch64::STPWpost:
3033 case AArch64::STPWpre:
3034 case AArch64::STPXpost:
3035 case AArch64::STPXpre:
3036 return 4;
3037 }
3038}
3039
3041 switch (MI.getOpcode()) {
3042 default:
3043 return false;
3044 // Scaled instructions.
3045 case AArch64::STRSui:
3046 case AArch64::STRDui:
3047 case AArch64::STRQui:
3048 case AArch64::STRXui:
3049 case AArch64::STRWui:
3050 case AArch64::LDRSui:
3051 case AArch64::LDRDui:
3052 case AArch64::LDRQui:
3053 case AArch64::LDRXui:
3054 case AArch64::LDRWui:
3055 case AArch64::LDRSWui:
3056 // Unscaled instructions.
3057 case AArch64::STURSi:
3058 case AArch64::STRSpre:
3059 case AArch64::STURDi:
3060 case AArch64::STRDpre:
3061 case AArch64::STURQi:
3062 case AArch64::STRQpre:
3063 case AArch64::STURWi:
3064 case AArch64::STRWpre:
3065 case AArch64::STURXi:
3066 case AArch64::STRXpre:
3067 case AArch64::LDURSi:
3068 case AArch64::LDRSpre:
3069 case AArch64::LDURDi:
3070 case AArch64::LDRDpre:
3071 case AArch64::LDURQi:
3072 case AArch64::LDRQpre:
3073 case AArch64::LDURWi:
3074 case AArch64::LDRWpre:
3075 case AArch64::LDURXi:
3076 case AArch64::LDRXpre:
3077 case AArch64::LDURSWi:
3078 case AArch64::LDRSWpre:
3079 // SVE instructions.
3080 case AArch64::LDR_ZXI:
3081 case AArch64::STR_ZXI:
3082 return true;
3083 }
3084}
3085
3087 switch (MI.getOpcode()) {
3088 default:
3089 assert((!MI.isCall() || !MI.isReturn()) &&
3090 "Unexpected instruction - was a new tail call opcode introduced?");
3091 return false;
3092 case AArch64::TCRETURNdi:
3093 case AArch64::TCRETURNri:
3094 case AArch64::TCRETURNrix16x17:
3095 case AArch64::TCRETURNrix17:
3096 case AArch64::TCRETURNrinotx16:
3097 case AArch64::TCRETURNriALL:
3098 case AArch64::AUTH_TCRETURN:
3099 case AArch64::AUTH_TCRETURN_BTI:
3100 return true;
3101 }
3102}
3103
3105 switch (Opc) {
3106 default:
3107 llvm_unreachable("Opcode has no flag setting equivalent!");
3108 // 32-bit cases:
3109 case AArch64::ADDWri:
3110 return AArch64::ADDSWri;
3111 case AArch64::ADDWrr:
3112 return AArch64::ADDSWrr;
3113 case AArch64::ADDWrs:
3114 return AArch64::ADDSWrs;
3115 case AArch64::ADDWrx:
3116 return AArch64::ADDSWrx;
3117 case AArch64::ANDWri:
3118 return AArch64::ANDSWri;
3119 case AArch64::ANDWrr:
3120 return AArch64::ANDSWrr;
3121 case AArch64::ANDWrs:
3122 return AArch64::ANDSWrs;
3123 case AArch64::BICWrr:
3124 return AArch64::BICSWrr;
3125 case AArch64::BICWrs:
3126 return AArch64::BICSWrs;
3127 case AArch64::SUBWri:
3128 return AArch64::SUBSWri;
3129 case AArch64::SUBWrr:
3130 return AArch64::SUBSWrr;
3131 case AArch64::SUBWrs:
3132 return AArch64::SUBSWrs;
3133 case AArch64::SUBWrx:
3134 return AArch64::SUBSWrx;
3135 // 64-bit cases:
3136 case AArch64::ADDXri:
3137 return AArch64::ADDSXri;
3138 case AArch64::ADDXrr:
3139 return AArch64::ADDSXrr;
3140 case AArch64::ADDXrs:
3141 return AArch64::ADDSXrs;
3142 case AArch64::ADDXrx:
3143 return AArch64::ADDSXrx;
3144 case AArch64::ANDXri:
3145 return AArch64::ANDSXri;
3146 case AArch64::ANDXrr:
3147 return AArch64::ANDSXrr;
3148 case AArch64::ANDXrs:
3149 return AArch64::ANDSXrs;
3150 case AArch64::BICXrr:
3151 return AArch64::BICSXrr;
3152 case AArch64::BICXrs:
3153 return AArch64::BICSXrs;
3154 case AArch64::SUBXri:
3155 return AArch64::SUBSXri;
3156 case AArch64::SUBXrr:
3157 return AArch64::SUBSXrr;
3158 case AArch64::SUBXrs:
3159 return AArch64::SUBSXrs;
3160 case AArch64::SUBXrx:
3161 return AArch64::SUBSXrx;
3162 // SVE instructions:
3163 case AArch64::AND_PPzPP:
3164 return AArch64::ANDS_PPzPP;
3165 case AArch64::BIC_PPzPP:
3166 return AArch64::BICS_PPzPP;
3167 case AArch64::EOR_PPzPP:
3168 return AArch64::EORS_PPzPP;
3169 case AArch64::NAND_PPzPP:
3170 return AArch64::NANDS_PPzPP;
3171 case AArch64::NOR_PPzPP:
3172 return AArch64::NORS_PPzPP;
3173 case AArch64::ORN_PPzPP:
3174 return AArch64::ORNS_PPzPP;
3175 case AArch64::ORR_PPzPP:
3176 return AArch64::ORRS_PPzPP;
3177 case AArch64::BRKA_PPzP:
3178 return AArch64::BRKAS_PPzP;
3179 case AArch64::BRKPA_PPzPP:
3180 return AArch64::BRKPAS_PPzPP;
3181 case AArch64::BRKB_PPzP:
3182 return AArch64::BRKBS_PPzP;
3183 case AArch64::BRKPB_PPzPP:
3184 return AArch64::BRKPBS_PPzPP;
3185 case AArch64::BRKN_PPzP:
3186 return AArch64::BRKNS_PPzP;
3187 case AArch64::RDFFR_PPz:
3188 return AArch64::RDFFRS_PPz;
3189 case AArch64::PTRUE_B:
3190 return AArch64::PTRUES_B;
3191 }
3192}
3193
3194// Is this a candidate for ld/st merging or pairing? For example, we don't
3195// touch volatiles or load/stores that have a hint to avoid pair formation.
3197
3198 bool IsPreLdSt = isPreLdSt(MI);
3199
3200 // If this is a volatile load/store, don't mess with it.
3201 if (MI.hasOrderedMemoryRef())
3202 return false;
3203
3204 // Make sure this is a reg/fi+imm (as opposed to an address reloc).
3205 // For Pre-inc LD/ST, the operand is shifted by one.
3206 assert((MI.getOperand(IsPreLdSt ? 2 : 1).isReg() ||
3207 MI.getOperand(IsPreLdSt ? 2 : 1).isFI()) &&
3208 "Expected a reg or frame index operand.");
3209
3210 // For Pre-indexed addressing quadword instructions, the third operand is the
3211 // immediate value.
3212 bool IsImmPreLdSt = IsPreLdSt && MI.getOperand(3).isImm();
3213
3214 if (!MI.getOperand(2).isImm() && !IsImmPreLdSt)
3215 return false;
3216
3217 // Can't merge/pair if the instruction modifies the base register.
3218 // e.g., ldr x0, [x0]
3219 // This case will never occur with an FI base.
3220 // However, if the instruction is an LDR<S,D,Q,W,X,SW>pre or
3221 // STR<S,D,Q,W,X>pre, it can be merged.
3222 // For example:
3223 // ldr q0, [x11, #32]!
3224 // ldr q1, [x11, #16]
3225 // to
3226 // ldp q0, q1, [x11, #32]!
3227 if (MI.getOperand(1).isReg() && !IsPreLdSt) {
3228 Register BaseReg = MI.getOperand(1).getReg();
3230 if (MI.modifiesRegister(BaseReg, TRI))
3231 return false;
3232 }
3233
3234 // Pairing SVE fills/spills is only valid for little-endian targets that
3235 // implement VLS 128.
3236 switch (MI.getOpcode()) {
3237 default:
3238 break;
3239 case AArch64::LDR_ZXI:
3240 case AArch64::STR_ZXI:
3241 if (!Subtarget.isLittleEndian() ||
3242 Subtarget.getSVEVectorSizeInBits() != 128)
3243 return false;
3244 }
3245
3246 // Check if this load/store has a hint to avoid pair formation.
3247 // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
3249 return false;
3250
3251 // Do not pair any callee-save store/reload instructions in the
3252 // prologue/epilogue if the CFI information encoded the operations as separate
3253 // instructions, as that will cause the size of the actual prologue to mismatch
3254 // with the prologue size recorded in the Windows CFI.
3255 const MCAsmInfo *MAI = MI.getMF()->getTarget().getMCAsmInfo();
3256 bool NeedsWinCFI = MAI->usesWindowsCFI() &&
3257 MI.getMF()->getFunction().needsUnwindTableEntry();
3258 if (NeedsWinCFI && (MI.getFlag(MachineInstr::FrameSetup) ||
3260 return false;
3261
3262 // On some CPUs quad load/store pairs are slower than two single load/stores.
3263 if (Subtarget.isPaired128Slow()) {
3264 switch (MI.getOpcode()) {
3265 default:
3266 break;
3267 case AArch64::LDURQi:
3268 case AArch64::STURQi:
3269 case AArch64::LDRQui:
3270 case AArch64::STRQui:
3271 return false;
3272 }
3273 }
3274
3275 return true;
3276}
3277
3280 int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width,
3281 const TargetRegisterInfo *TRI) const {
3282 if (!LdSt.mayLoadOrStore())
3283 return false;
3284
3285 const MachineOperand *BaseOp;
3286 TypeSize WidthN(0, false);
3287 if (!getMemOperandWithOffsetWidth(LdSt, BaseOp, Offset, OffsetIsScalable,
3288 WidthN, TRI))
3289 return false;
3290 // The maximum vscale is 16 under AArch64, return the maximal extent for the
3291 // vector.
3292 Width = LocationSize::precise(WidthN);
3293 BaseOps.push_back(BaseOp);
3294 return true;
3295}
3296
3297std::optional<ExtAddrMode>
3299 const TargetRegisterInfo *TRI) const {
3300 const MachineOperand *Base; // Filled with the base operand of MI.
3301 int64_t Offset; // Filled with the offset of MI.
3302 bool OffsetIsScalable;
3303 if (!getMemOperandWithOffset(MemI, Base, Offset, OffsetIsScalable, TRI))
3304 return std::nullopt;
3305
3306 if (!Base->isReg())
3307 return std::nullopt;
3308 ExtAddrMode AM;
3309 AM.BaseReg = Base->getReg();
3310 AM.Displacement = Offset;
3311 AM.ScaledReg = 0;
3312 AM.Scale = 0;
3313 return AM;
3314}
3315
3317 Register Reg,
3318 const MachineInstr &AddrI,
3319 ExtAddrMode &AM) const {
3320 // Filter out instructions into which we cannot fold.
3321 unsigned NumBytes;
3322 int64_t OffsetScale = 1;
3323 switch (MemI.getOpcode()) {
3324 default:
3325 return false;
3326
3327 case AArch64::LDURQi:
3328 case AArch64::STURQi:
3329 NumBytes = 16;
3330 break;
3331
3332 case AArch64::LDURDi:
3333 case AArch64::STURDi:
3334 case AArch64::LDURXi:
3335 case AArch64::STURXi:
3336 NumBytes = 8;
3337 break;
3338
3339 case AArch64::LDURWi:
3340 case AArch64::LDURSWi:
3341 case AArch64::STURWi:
3342 NumBytes = 4;
3343 break;
3344
3345 case AArch64::LDURHi:
3346 case AArch64::STURHi:
3347 case AArch64::LDURHHi:
3348 case AArch64::STURHHi:
3349 case AArch64::LDURSHXi:
3350 case AArch64::LDURSHWi:
3351 NumBytes = 2;
3352 break;
3353
3354 case AArch64::LDRBroX:
3355 case AArch64::LDRBBroX:
3356 case AArch64::LDRSBXroX:
3357 case AArch64::LDRSBWroX:
3358 case AArch64::STRBroX:
3359 case AArch64::STRBBroX:
3360 case AArch64::LDURBi:
3361 case AArch64::LDURBBi:
3362 case AArch64::LDURSBXi:
3363 case AArch64::LDURSBWi:
3364 case AArch64::STURBi:
3365 case AArch64::STURBBi:
3366 case AArch64::LDRBui:
3367 case AArch64::LDRBBui:
3368 case AArch64::LDRSBXui:
3369 case AArch64::LDRSBWui:
3370 case AArch64::STRBui:
3371 case AArch64::STRBBui:
3372 NumBytes = 1;
3373 break;
3374
3375 case AArch64::LDRQroX:
3376 case AArch64::STRQroX:
3377 case AArch64::LDRQui:
3378 case AArch64::STRQui:
3379 NumBytes = 16;
3380 OffsetScale = 16;
3381 break;
3382
3383 case AArch64::LDRDroX:
3384 case AArch64::STRDroX:
3385 case AArch64::LDRXroX:
3386 case AArch64::STRXroX:
3387 case AArch64::LDRDui:
3388 case AArch64::STRDui:
3389 case AArch64::LDRXui:
3390 case AArch64::STRXui:
3391 NumBytes = 8;
3392 OffsetScale = 8;
3393 break;
3394
3395 case AArch64::LDRWroX:
3396 case AArch64::LDRSWroX:
3397 case AArch64::STRWroX:
3398 case AArch64::LDRWui:
3399 case AArch64::LDRSWui:
3400 case AArch64::STRWui:
3401 NumBytes = 4;
3402 OffsetScale = 4;
3403 break;
3404
3405 case AArch64::LDRHroX:
3406 case AArch64::STRHroX:
3407 case AArch64::LDRHHroX:
3408 case AArch64::STRHHroX:
3409 case AArch64::LDRSHXroX:
3410 case AArch64::LDRSHWroX:
3411 case AArch64::LDRHui:
3412 case AArch64::STRHui:
3413 case AArch64::LDRHHui:
3414 case AArch64::STRHHui:
3415 case AArch64::LDRSHXui:
3416 case AArch64::LDRSHWui:
3417 NumBytes = 2;
3418 OffsetScale = 2;
3419 break;
3420 }
3421
3422 // Check the fold operand is not the loaded/stored value.
3423 const MachineOperand &BaseRegOp = MemI.getOperand(0);
3424 if (BaseRegOp.isReg() && BaseRegOp.getReg() == Reg)
3425 return false;
3426
3427 // Handle memory instructions with a [Reg, Reg] addressing mode.
3428 if (MemI.getOperand(2).isReg()) {
3429 // Bail if the addressing mode already includes extension of the offset
3430 // register.
3431 if (MemI.getOperand(3).getImm())
3432 return false;
3433
3434 // Check if we actually have a scaled offset.
3435 if (MemI.getOperand(4).getImm() == 0)
3436 OffsetScale = 1;
3437
3438 // If the address instructions is folded into the base register, then the
3439 // addressing mode must not have a scale. Then we can swap the base and the
3440 // scaled registers.
3441 if (MemI.getOperand(1).getReg() == Reg && OffsetScale != 1)
3442 return false;
3443
3444 switch (AddrI.getOpcode()) {
3445 default:
3446 return false;
3447
3448 case AArch64::SBFMXri:
3449 // sxtw Xa, Wm
3450 // ldr Xd, [Xn, Xa, lsl #N]
3451 // ->
3452 // ldr Xd, [Xn, Wm, sxtw #N]
3453 if (AddrI.getOperand(2).getImm() != 0 ||
3454 AddrI.getOperand(3).getImm() != 31)
3455 return false;
3456
3457 AM.BaseReg = MemI.getOperand(1).getReg();
3458 if (AM.BaseReg == Reg)
3459 AM.BaseReg = MemI.getOperand(2).getReg();
3460 AM.ScaledReg = AddrI.getOperand(1).getReg();
3461 AM.Scale = OffsetScale;
3462 AM.Displacement = 0;
3464 return true;
3465
3466 case TargetOpcode::SUBREG_TO_REG: {
3467 // mov Wa, Wm
3468 // ldr Xd, [Xn, Xa, lsl #N]
3469 // ->
3470 // ldr Xd, [Xn, Wm, uxtw #N]
3471
3472 // Zero-extension looks like an ORRWrs followed by a SUBREG_TO_REG.
3473 if (AddrI.getOperand(1).getImm() != 0 ||
3474 AddrI.getOperand(3).getImm() != AArch64::sub_32)
3475 return false;
3476
3477 const MachineRegisterInfo &MRI = AddrI.getMF()->getRegInfo();
3478 Register OffsetReg = AddrI.getOperand(2).getReg();
3479 if (!OffsetReg.isVirtual() || !MRI.hasOneNonDBGUse(OffsetReg))
3480 return false;
3481
3482 const MachineInstr &DefMI = *MRI.getVRegDef(OffsetReg);
3483 if (DefMI.getOpcode() != AArch64::ORRWrs ||
3484 DefMI.getOperand(1).getReg() != AArch64::WZR ||
3485 DefMI.getOperand(3).getImm() != 0)
3486 return false;
3487
3488 AM.BaseReg = MemI.getOperand(1).getReg();
3489 if (AM.BaseReg == Reg)
3490 AM.BaseReg = MemI.getOperand(2).getReg();
3491 AM.ScaledReg = DefMI.getOperand(2).getReg();
3492 AM.Scale = OffsetScale;
3493 AM.Displacement = 0;
3495 return true;
3496 }
3497 }
3498 }
3499
3500 // Handle memory instructions with a [Reg, #Imm] addressing mode.
3501
3502 // Check we are not breaking a potential conversion to an LDP.
3503 auto validateOffsetForLDP = [](unsigned NumBytes, int64_t OldOffset,
3504 int64_t NewOffset) -> bool {
3505 int64_t MinOffset, MaxOffset;
3506 switch (NumBytes) {
3507 default:
3508 return true;
3509 case 4:
3510 MinOffset = -256;
3511 MaxOffset = 252;
3512 break;
3513 case 8:
3514 MinOffset = -512;
3515 MaxOffset = 504;
3516 break;
3517 case 16:
3518 MinOffset = -1024;
3519 MaxOffset = 1008;
3520 break;
3521 }
3522 return OldOffset < MinOffset || OldOffset > MaxOffset ||
3523 (NewOffset >= MinOffset && NewOffset <= MaxOffset);
3524 };
3525 auto canFoldAddSubImmIntoAddrMode = [&](int64_t Disp) -> bool {
3526 int64_t OldOffset = MemI.getOperand(2).getImm() * OffsetScale;
3527 int64_t NewOffset = OldOffset + Disp;
3528 if (!isLegalAddressingMode(NumBytes, NewOffset, /* Scale */ 0))
3529 return false;
3530 // If the old offset would fit into an LDP, but the new offset wouldn't,
3531 // bail out.
3532 if (!validateOffsetForLDP(NumBytes, OldOffset, NewOffset))
3533 return false;
3534 AM.BaseReg = AddrI.getOperand(1).getReg();
3535 AM.ScaledReg = 0;
3536 AM.Scale = 0;
3537 AM.Displacement = NewOffset;
3539 return true;
3540 };
3541
3542 auto canFoldAddRegIntoAddrMode =
3543 [&](int64_t Scale,
3545 if (MemI.getOperand(2).getImm() != 0)
3546 return false;
3547 if ((unsigned)Scale != Scale)
3548 return false;
3549 if (!isLegalAddressingMode(NumBytes, /* Offset */ 0, Scale))
3550 return false;
3551 AM.BaseReg = AddrI.getOperand(1).getReg();
3552 AM.ScaledReg = AddrI.getOperand(2).getReg();
3553 AM.Scale = Scale;
3554 AM.Displacement = 0;
3555 AM.Form = Form;
3556 return true;
3557 };
3558
3559 auto avoidSlowSTRQ = [&](const MachineInstr &MemI) {
3560 unsigned Opcode = MemI.getOpcode();
3561 return (Opcode == AArch64::STURQi || Opcode == AArch64::STRQui) &&
3562 Subtarget.isSTRQroSlow();
3563 };
3564
3565 int64_t Disp = 0;
3566 const bool OptSize = MemI.getMF()->getFunction().hasOptSize();
3567 switch (AddrI.getOpcode()) {
3568 default:
3569 return false;
3570
3571 case AArch64::ADDXri:
3572 // add Xa, Xn, #N
3573 // ldr Xd, [Xa, #M]
3574 // ->
3575 // ldr Xd, [Xn, #N'+M]
3576 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3577 return canFoldAddSubImmIntoAddrMode(Disp);
3578
3579 case AArch64::SUBXri:
3580 // sub Xa, Xn, #N
3581 // ldr Xd, [Xa, #M]
3582 // ->
3583 // ldr Xd, [Xn, #N'+M]
3584 Disp = AddrI.getOperand(2).getImm() << AddrI.getOperand(3).getImm();
3585 return canFoldAddSubImmIntoAddrMode(-Disp);
3586
3587 case AArch64::ADDXrs: {
3588 // add Xa, Xn, Xm, lsl #N
3589 // ldr Xd, [Xa]
3590 // ->
3591 // ldr Xd, [Xn, Xm, lsl #N]
3592
3593 // Don't fold the add if the result would be slower, unless optimising for
3594 // size.
3595 unsigned Shift = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3597 return false;
3598 Shift = AArch64_AM::getShiftValue(Shift);
3599 if (!OptSize) {
3600 if (Shift != 2 && Shift != 3 && Subtarget.hasAddrLSLSlow14())
3601 return false;
3602 if (avoidSlowSTRQ(MemI))
3603 return false;
3604 }
3605 return canFoldAddRegIntoAddrMode(1ULL << Shift);
3606 }
3607
3608 case AArch64::ADDXrr:
3609 // add Xa, Xn, Xm
3610 // ldr Xd, [Xa]
3611 // ->
3612 // ldr Xd, [Xn, Xm, lsl #0]
3613
3614 // Don't fold the add if the result would be slower, unless optimising for
3615 // size.
3616 if (!OptSize && avoidSlowSTRQ(MemI))
3617 return false;
3618 return canFoldAddRegIntoAddrMode(1);
3619
3620 case AArch64::ADDXrx:
3621 // add Xa, Xn, Wm, {s,u}xtw #N
3622 // ldr Xd, [Xa]
3623 // ->
3624 // ldr Xd, [Xn, Wm, {s,u}xtw #N]
3625
3626 // Don't fold the add if the result would be slower, unless optimising for
3627 // size.
3628 if (!OptSize && avoidSlowSTRQ(MemI))
3629 return false;
3630
3631 // Can fold only sign-/zero-extend of a word.
3632 unsigned Imm = static_cast<unsigned>(AddrI.getOperand(3).getImm());
3634 if (Extend != AArch64_AM::UXTW && Extend != AArch64_AM::SXTW)
3635 return false;
3636
3637 return canFoldAddRegIntoAddrMode(
3638 1ULL << AArch64_AM::getArithShiftValue(Imm),
3641 }
3642}
3643
3644// Given an opcode for an instruction with a [Reg, #Imm] addressing mode,
3645// return the opcode of an instruction performing the same operation, but using
3646// the [Reg, Reg] addressing mode.
3647static unsigned regOffsetOpcode(unsigned Opcode) {
3648 switch (Opcode) {
3649 default:
3650 llvm_unreachable("Address folding not implemented for instruction");
3651
3652 case AArch64::LDURQi:
3653 case AArch64::LDRQui:
3654 return AArch64::LDRQroX;
3655 case AArch64::STURQi:
3656 case AArch64::STRQui:
3657 return AArch64::STRQroX;
3658 case AArch64::LDURDi:
3659 case AArch64::LDRDui:
3660 return AArch64::LDRDroX;
3661 case AArch64::STURDi:
3662 case AArch64::STRDui:
3663 return AArch64::STRDroX;
3664 case AArch64::LDURXi:
3665 case AArch64::LDRXui:
3666 return AArch64::LDRXroX;
3667 case AArch64::STURXi:
3668 case AArch64::STRXui:
3669 return AArch64::STRXroX;
3670 case AArch64::LDURWi:
3671 case AArch64::LDRWui:
3672 return AArch64::LDRWroX;
3673 case AArch64::LDURSWi:
3674 case AArch64::LDRSWui:
3675 return AArch64::LDRSWroX;
3676 case AArch64::STURWi:
3677 case AArch64::STRWui:
3678 return AArch64::STRWroX;
3679 case AArch64::LDURHi:
3680 case AArch64::LDRHui:
3681 return AArch64::LDRHroX;
3682 case AArch64::STURHi:
3683 case AArch64::STRHui:
3684 return AArch64::STRHroX;
3685 case AArch64::LDURHHi:
3686 case AArch64::LDRHHui:
3687 return AArch64::LDRHHroX;
3688 case AArch64::STURHHi:
3689 case AArch64::STRHHui:
3690 return AArch64::STRHHroX;
3691 case AArch64::LDURSHXi:
3692 case AArch64::LDRSHXui:
3693 return AArch64::LDRSHXroX;
3694 case AArch64::LDURSHWi:
3695 case AArch64::LDRSHWui:
3696 return AArch64::LDRSHWroX;
3697 case AArch64::LDURBi:
3698 case AArch64::LDRBui:
3699 return AArch64::LDRBroX;
3700 case AArch64::LDURBBi:
3701 case AArch64::LDRBBui:
3702 return AArch64::LDRBBroX;
3703 case AArch64::LDURSBXi:
3704 case AArch64::LDRSBXui:
3705 return AArch64::LDRSBXroX;
3706 case AArch64::LDURSBWi:
3707 case AArch64::LDRSBWui:
3708 return AArch64::LDRSBWroX;
3709 case AArch64::STURBi:
3710 case AArch64::STRBui:
3711 return AArch64::STRBroX;
3712 case AArch64::STURBBi:
3713 case AArch64::STRBBui:
3714 return AArch64::STRBBroX;
3715 }
3716}
3717
3718// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3719// the opcode of an instruction performing the same operation, but using the
3720// [Reg, #Imm] addressing mode with scaled offset.
3721unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale) {
3722 switch (Opcode) {
3723 default:
3724 llvm_unreachable("Address folding not implemented for instruction");
3725
3726 case AArch64::LDURQi:
3727 Scale = 16;
3728 return AArch64::LDRQui;
3729 case AArch64::STURQi:
3730 Scale = 16;
3731 return AArch64::STRQui;
3732 case AArch64::LDURDi:
3733 Scale = 8;
3734 return AArch64::LDRDui;
3735 case AArch64::STURDi:
3736 Scale = 8;
3737 return AArch64::STRDui;
3738 case AArch64::LDURXi:
3739 Scale = 8;
3740 return AArch64::LDRXui;
3741 case AArch64::STURXi:
3742 Scale = 8;
3743 return AArch64::STRXui;
3744 case AArch64::LDURWi:
3745 Scale = 4;
3746 return AArch64::LDRWui;
3747 case AArch64::LDURSWi:
3748 Scale = 4;
3749 return AArch64::LDRSWui;
3750 case AArch64::STURWi:
3751 Scale = 4;
3752 return AArch64::STRWui;
3753 case AArch64::LDURHi:
3754 Scale = 2;
3755 return AArch64::LDRHui;
3756 case AArch64::STURHi:
3757 Scale = 2;
3758 return AArch64::STRHui;
3759 case AArch64::LDURHHi:
3760 Scale = 2;
3761 return AArch64::LDRHHui;
3762 case AArch64::STURHHi:
3763 Scale = 2;
3764 return AArch64::STRHHui;
3765 case AArch64::LDURSHXi:
3766 Scale = 2;
3767 return AArch64::LDRSHXui;
3768 case AArch64::LDURSHWi:
3769 Scale = 2;
3770 return AArch64::LDRSHWui;
3771 case AArch64::LDURBi:
3772 Scale = 1;
3773 return AArch64::LDRBui;
3774 case AArch64::LDURBBi:
3775 Scale = 1;
3776 return AArch64::LDRBBui;
3777 case AArch64::LDURSBXi:
3778 Scale = 1;
3779 return AArch64::LDRSBXui;
3780 case AArch64::LDURSBWi:
3781 Scale = 1;
3782 return AArch64::LDRSBWui;
3783 case AArch64::STURBi:
3784 Scale = 1;
3785 return AArch64::STRBui;
3786 case AArch64::STURBBi:
3787 Scale = 1;
3788 return AArch64::STRBBui;
3789 case AArch64::LDRQui:
3790 case AArch64::STRQui:
3791 Scale = 16;
3792 return Opcode;
3793 case AArch64::LDRDui:
3794 case AArch64::STRDui:
3795 case AArch64::LDRXui:
3796 case AArch64::STRXui:
3797 Scale = 8;
3798 return Opcode;
3799 case AArch64::LDRWui:
3800 case AArch64::LDRSWui:
3801 case AArch64::STRWui:
3802 Scale = 4;
3803 return Opcode;
3804 case AArch64::LDRHui:
3805 case AArch64::STRHui:
3806 case AArch64::LDRHHui:
3807 case AArch64::STRHHui:
3808 case AArch64::LDRSHXui:
3809 case AArch64::LDRSHWui:
3810 Scale = 2;
3811 return Opcode;
3812 case AArch64::LDRBui:
3813 case AArch64::LDRBBui:
3814 case AArch64::LDRSBXui:
3815 case AArch64::LDRSBWui:
3816 case AArch64::STRBui:
3817 case AArch64::STRBBui:
3818 Scale = 1;
3819 return Opcode;
3820 }
3821}
3822
3823// Given an opcode for an instruction with a [Reg, #Imm] addressing mode, return
3824// the opcode of an instruction performing the same operation, but using the
3825// [Reg, #Imm] addressing mode with unscaled offset.
3826unsigned unscaledOffsetOpcode(unsigned Opcode) {
3827 switch (Opcode) {
3828 default:
3829 llvm_unreachable("Address folding not implemented for instruction");
3830
3831 case AArch64::LDURQi:
3832 case AArch64::STURQi:
3833 case AArch64::LDURDi:
3834 case AArch64::STURDi:
3835 case AArch64::LDURXi:
3836 case AArch64::STURXi:
3837 case AArch64::LDURWi:
3838 case AArch64::LDURSWi:
3839 case AArch64::STURWi:
3840 case AArch64::LDURHi:
3841 case AArch64::STURHi:
3842 case AArch64::LDURHHi:
3843 case AArch64::STURHHi:
3844 case AArch64::LDURSHXi:
3845 case AArch64::LDURSHWi:
3846 case AArch64::LDURBi:
3847 case AArch64::STURBi:
3848 case AArch64::LDURBBi:
3849 case AArch64::STURBBi:
3850 case AArch64::LDURSBWi:
3851 case AArch64::LDURSBXi:
3852 return Opcode;
3853 case AArch64::LDRQui:
3854 return AArch64::LDURQi;
3855 case AArch64::STRQui:
3856 return AArch64::STURQi;
3857 case AArch64::LDRDui:
3858 return AArch64::LDURDi;
3859 case AArch64::STRDui:
3860 return AArch64::STURDi;
3861 case AArch64::LDRXui:
3862 return AArch64::LDURXi;
3863 case AArch64::STRXui:
3864 return AArch64::STURXi;
3865 case AArch64::LDRWui:
3866 return AArch64::LDURWi;
3867 case AArch64::LDRSWui:
3868 return AArch64::LDURSWi;
3869 case AArch64::STRWui:
3870 return AArch64::STURWi;
3871 case AArch64::LDRHui:
3872 return AArch64::LDURHi;
3873 case AArch64::STRHui:
3874 return AArch64::STURHi;
3875 case AArch64::LDRHHui:
3876 return AArch64::LDURHHi;
3877 case AArch64::STRHHui:
3878 return AArch64::STURHHi;
3879 case AArch64::LDRSHXui:
3880 return AArch64::LDURSHXi;
3881 case AArch64::LDRSHWui:
3882 return AArch64::LDURSHWi;
3883 case AArch64::LDRBBui:
3884 return AArch64::LDURBBi;
3885 case AArch64::LDRBui:
3886 return AArch64::LDURBi;
3887 case AArch64::STRBBui:
3888 return AArch64::STURBBi;
3889 case AArch64::STRBui:
3890 return AArch64::STURBi;
3891 case AArch64::LDRSBWui:
3892 return AArch64::LDURSBWi;
3893 case AArch64::LDRSBXui:
3894 return AArch64::LDURSBXi;
3895 }
3896}
3897
3898// Given the opcode of a memory load/store instruction, return the opcode of an
3899// instruction performing the same operation, but using
3900// the [Reg, Reg, {s,u}xtw #N] addressing mode with sign-/zero-extend of the
3901// offset register.
3902static unsigned offsetExtendOpcode(unsigned Opcode) {
3903 switch (Opcode) {
3904 default:
3905 llvm_unreachable("Address folding not implemented for instruction");
3906
3907 case AArch64::LDRQroX:
3908 case AArch64::LDURQi:
3909 case AArch64::LDRQui:
3910 return AArch64::LDRQroW;
3911 case AArch64::STRQroX:
3912 case AArch64::STURQi:
3913 case AArch64::STRQui:
3914 return AArch64::STRQroW;
3915 case AArch64::LDRDroX:
3916 case AArch64::LDURDi:
3917 case AArch64::LDRDui:
3918 return AArch64::LDRDroW;
3919 case AArch64::STRDroX:
3920 case AArch64::STURDi:
3921 case AArch64::STRDui:
3922 return AArch64::STRDroW;
3923 case AArch64::LDRXroX:
3924 case AArch64::LDURXi:
3925 case AArch64::LDRXui:
3926 return AArch64::LDRXroW;
3927 case AArch64::STRXroX:
3928 case AArch64::STURXi:
3929 case AArch64::STRXui:
3930 return AArch64::STRXroW;
3931 case AArch64::LDRWroX:
3932 case AArch64::LDURWi:
3933 case AArch64::LDRWui:
3934 return AArch64::LDRWroW;
3935 case AArch64::LDRSWroX:
3936 case AArch64::LDURSWi:
3937 case AArch64::LDRSWui:
3938 return AArch64::LDRSWroW;
3939 case AArch64::STRWroX:
3940 case AArch64::STURWi:
3941 case AArch64::STRWui:
3942 return AArch64::STRWroW;
3943 case AArch64::LDRHroX:
3944 case AArch64::LDURHi:
3945 case AArch64::LDRHui:
3946 return AArch64::LDRHroW;
3947 case AArch64::STRHroX:
3948 case AArch64::STURHi:
3949 case AArch64::STRHui:
3950 return AArch64::STRHroW;
3951 case AArch64::LDRHHroX:
3952 case AArch64::LDURHHi:
3953 case AArch64::LDRHHui:
3954 return AArch64::LDRHHroW;
3955 case AArch64::STRHHroX:
3956 case AArch64::STURHHi:
3957 case AArch64::STRHHui:
3958 return AArch64::STRHHroW;
3959 case AArch64::LDRSHXroX:
3960 case AArch64::LDURSHXi:
3961 case AArch64::LDRSHXui:
3962 return AArch64::LDRSHXroW;
3963 case AArch64::LDRSHWroX:
3964 case AArch64::LDURSHWi:
3965 case AArch64::LDRSHWui:
3966 return AArch64::LDRSHWroW;
3967 case AArch64::LDRBroX:
3968 case AArch64::LDURBi:
3969 case AArch64::LDRBui:
3970 return AArch64::LDRBroW;
3971 case AArch64::LDRBBroX:
3972 case AArch64::LDURBBi:
3973 case AArch64::LDRBBui:
3974 return AArch64::LDRBBroW;
3975 case AArch64::LDRSBXroX:
3976 case AArch64::LDURSBXi:
3977 case AArch64::LDRSBXui:
3978 return AArch64::LDRSBXroW;
3979 case AArch64::LDRSBWroX:
3980 case AArch64::LDURSBWi:
3981 case AArch64::LDRSBWui:
3982 return AArch64::LDRSBWroW;
3983 case AArch64::STRBroX:
3984 case AArch64::STURBi:
3985 case AArch64::STRBui:
3986 return AArch64::STRBroW;
3987 case AArch64::STRBBroX:
3988 case AArch64::STURBBi:
3989 case AArch64::STRBBui:
3990 return AArch64::STRBBroW;
3991 }
3992}
3993
3995 const ExtAddrMode &AM) const {
3996
3997 const DebugLoc &DL = MemI.getDebugLoc();
3998 MachineBasicBlock &MBB = *MemI.getParent();
4000
4002 if (AM.ScaledReg) {
4003 // The new instruction will be in the form `ldr Rt, [Xn, Xm, lsl #imm]`.
4004 unsigned Opcode = regOffsetOpcode(MemI.getOpcode());
4005 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
4006 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
4007 .addReg(MemI.getOperand(0).getReg(),
4008 MemI.mayLoad() ? RegState::Define : 0)
4009 .addReg(AM.BaseReg)
4010 .addReg(AM.ScaledReg)
4011 .addImm(0)
4012 .addImm(AM.Scale > 1)
4013 .setMemRefs(MemI.memoperands())
4014 .setMIFlags(MemI.getFlags());
4015 return B.getInstr();
4016 }
4017
4018 assert(AM.ScaledReg == 0 && AM.Scale == 0 &&
4019 "Addressing mode not supported for folding");
4020
4021 // The new instruction will be in the form `ld[u]r Rt, [Xn, #imm]`.
4022 unsigned Scale = 1;
4023 unsigned Opcode = MemI.getOpcode();
4024 if (isInt<9>(AM.Displacement))
4025 Opcode = unscaledOffsetOpcode(Opcode);
4026 else
4027 Opcode = scaledOffsetOpcode(Opcode, Scale);
4028
4029 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
4030 .addReg(MemI.getOperand(0).getReg(),
4031 MemI.mayLoad() ? RegState::Define : 0)
4032 .addReg(AM.BaseReg)
4033 .addImm(AM.Displacement / Scale)
4034 .setMemRefs(MemI.memoperands())
4035 .setMIFlags(MemI.getFlags());
4036 return B.getInstr();
4037 }
4038
4041 // The new instruction will be in the form `ldr Rt, [Xn, Wm, {s,u}xtw #N]`.
4042 assert(AM.ScaledReg && !AM.Displacement &&
4043 "Address offset can be a register or an immediate, but not both");
4044 unsigned Opcode = offsetExtendOpcode(MemI.getOpcode());
4045 MRI.constrainRegClass(AM.BaseReg, &AArch64::GPR64spRegClass);
4046 // Make sure the offset register is in the correct register class.
4047 Register OffsetReg = AM.ScaledReg;
4048 const TargetRegisterClass *RC = MRI.getRegClass(OffsetReg);
4049 if (RC->hasSuperClassEq(&AArch64::GPR64RegClass)) {
4050 OffsetReg = MRI.createVirtualRegister(&AArch64::GPR32RegClass);
4051 BuildMI(MBB, MemI, DL, get(TargetOpcode::COPY), OffsetReg)
4052 .addReg(AM.ScaledReg, 0, AArch64::sub_32);
4053 }
4054 auto B = BuildMI(MBB, MemI, DL, get(Opcode))
4055 .addReg(MemI.getOperand(0).getReg(),
4056 MemI.mayLoad() ? RegState::Define : 0)
4057 .addReg(AM.BaseReg)
4058 .addReg(OffsetReg)
4060 .addImm(AM.Scale != 1)
4061 .setMemRefs(MemI.memoperands())
4062 .setMIFlags(MemI.getFlags());
4063
4064 return B.getInstr();
4065 }
4066
4068 "Function must not be called with an addressing mode it can't handle");
4069}
4070
4071/// Return true if the opcode is a post-index ld/st instruction, which really
4072/// loads from base+0.
4073static bool isPostIndexLdStOpcode(unsigned Opcode) {
4074 switch (Opcode) {
4075 default:
4076 return false;
4077 case AArch64::LD1Fourv16b_POST:
4078 case AArch64::LD1Fourv1d_POST:
4079 case AArch64::LD1Fourv2d_POST:
4080 case AArch64::LD1Fourv2s_POST:
4081 case AArch64::LD1Fourv4h_POST:
4082 case AArch64::LD1Fourv4s_POST:
4083 case AArch64::LD1Fourv8b_POST:
4084 case AArch64::LD1Fourv8h_POST:
4085 case AArch64::LD1Onev16b_POST:
4086 case AArch64::LD1Onev1d_POST:
4087 case AArch64::LD1Onev2d_POST:
4088 case AArch64::LD1Onev2s_POST:
4089 case AArch64::LD1Onev4h_POST:
4090 case AArch64::LD1Onev4s_POST:
4091 case AArch64::LD1Onev8b_POST:
4092 case AArch64::LD1Onev8h_POST:
4093 case AArch64::LD1Rv16b_POST:
4094 case AArch64::LD1Rv1d_POST:
4095 case AArch64::LD1Rv2d_POST:
4096 case AArch64::LD1Rv2s_POST:
4097 case AArch64::LD1Rv4h_POST:
4098 case AArch64::LD1Rv4s_POST:
4099 case AArch64::LD1Rv8b_POST:
4100 case AArch64::LD1Rv8h_POST:
4101 case AArch64::LD1Threev16b_POST:
4102 case AArch64::LD1Threev1d_POST:
4103 case AArch64::LD1Threev2d_POST:
4104 case AArch64::LD1Threev2s_POST:
4105 case AArch64::LD1Threev4h_POST:
4106 case AArch64::LD1Threev4s_POST:
4107 case AArch64::LD1Threev8b_POST:
4108 case AArch64::LD1Threev8h_POST:
4109 case AArch64::LD1Twov16b_POST:
4110 case AArch64::LD1Twov1d_POST:
4111 case AArch64::LD1Twov2d_POST:
4112 case AArch64::LD1Twov2s_POST:
4113 case AArch64::LD1Twov4h_POST:
4114 case AArch64::LD1Twov4s_POST:
4115 case AArch64::LD1Twov8b_POST:
4116 case AArch64::LD1Twov8h_POST:
4117 case AArch64::LD1i16_POST:
4118 case AArch64::LD1i32_POST:
4119 case AArch64::LD1i64_POST:
4120 case AArch64::LD1i8_POST:
4121 case AArch64::LD2Rv16b_POST:
4122 case AArch64::LD2Rv1d_POST:
4123 case AArch64::LD2Rv2d_POST:
4124 case AArch64::LD2Rv2s_POST:
4125 case AArch64::LD2Rv4h_POST:
4126 case AArch64::LD2Rv4s_POST:
4127 case AArch64::LD2Rv8b_POST:
4128 case AArch64::LD2Rv8h_POST:
4129 case AArch64::LD2Twov16b_POST:
4130 case AArch64::LD2Twov2d_POST:
4131 case AArch64::LD2Twov2s_POST:
4132 case AArch64::LD2Twov4h_POST:
4133 case AArch64::LD2Twov4s_POST:
4134 case AArch64::LD2Twov8b_POST:
4135 case AArch64::LD2Twov8h_POST:
4136 case AArch64::LD2i16_POST:
4137 case AArch64::LD2i32_POST:
4138 case AArch64::LD2i64_POST:
4139 case AArch64::LD2i8_POST:
4140 case AArch64::LD3Rv16b_POST:
4141 case AArch64::LD3Rv1d_POST:
4142 case AArch64::LD3Rv2d_POST:
4143 case AArch64::LD3Rv2s_POST:
4144 case AArch64::LD3Rv4h_POST:
4145 case AArch64::LD3Rv4s_POST:
4146 case AArch64::LD3Rv8b_POST:
4147 case AArch64::LD3Rv8h_POST:
4148 case AArch64::LD3Threev16b_POST:
4149 case AArch64::LD3Threev2d_POST:
4150 case AArch64::LD3Threev2s_POST:
4151 case AArch64::LD3Threev4h_POST:
4152 case AArch64::LD3Threev4s_POST:
4153 case AArch64::LD3Threev8b_POST:
4154 case AArch64::LD3Threev8h_POST:
4155 case AArch64::LD3i16_POST:
4156 case AArch64::LD3i32_POST:
4157 case AArch64::LD3i64_POST:
4158 case AArch64::LD3i8_POST:
4159 case AArch64::LD4Fourv16b_POST:
4160 case AArch64::LD4Fourv2d_POST:
4161 case AArch64::LD4Fourv2s_POST:
4162 case AArch64::LD4Fourv4h_POST:
4163 case AArch64::LD4Fourv4s_POST:
4164 case AArch64::LD4Fourv8b_POST:
4165 case AArch64::LD4Fourv8h_POST:
4166 case AArch64::LD4Rv16b_POST:
4167 case AArch64::LD4Rv1d_POST:
4168 case AArch64::LD4Rv2d_POST:
4169 case AArch64::LD4Rv2s_POST:
4170 case AArch64::LD4Rv4h_POST:
4171 case AArch64::LD4Rv4s_POST:
4172 case AArch64::LD4Rv8b_POST:
4173 case AArch64::LD4Rv8h_POST:
4174 case AArch64::LD4i16_POST:
4175 case AArch64::LD4i32_POST:
4176 case AArch64::LD4i64_POST:
4177 case AArch64::LD4i8_POST:
4178 case AArch64::LDAPRWpost:
4179 case AArch64::LDAPRXpost:
4180 case AArch64::LDIAPPWpost:
4181 case AArch64::LDIAPPXpost:
4182 case AArch64::LDPDpost:
4183 case AArch64::LDPQpost:
4184 case AArch64::LDPSWpost:
4185 case AArch64::LDPSpost:
4186 case AArch64::LDPWpost:
4187 case AArch64::LDPXpost:
4188 case AArch64::LDRBBpost:
4189 case AArch64::LDRBpost:
4190 case AArch64::LDRDpost:
4191 case AArch64::LDRHHpost:
4192 case AArch64::LDRHpost:
4193 case AArch64::LDRQpost:
4194 case AArch64::LDRSBWpost:
4195 case AArch64::LDRSBXpost:
4196 case AArch64::LDRSHWpost:
4197 case AArch64::LDRSHXpost:
4198 case AArch64::LDRSWpost:
4199 case AArch64::LDRSpost:
4200 case AArch64::LDRWpost:
4201 case AArch64::LDRXpost:
4202 case AArch64::ST1Fourv16b_POST:
4203 case AArch64::ST1Fourv1d_POST:
4204 case AArch64::ST1Fourv2d_POST:
4205 case AArch64::ST1Fourv2s_POST:
4206 case AArch64::ST1Fourv4h_POST:
4207 case AArch64::ST1Fourv4s_POST:
4208 case AArch64::ST1Fourv8b_POST:
4209 case AArch64::ST1Fourv8h_POST:
4210 case AArch64::ST1Onev16b_POST:
4211 case AArch64::ST1Onev1d_POST:
4212 case AArch64::ST1Onev2d_POST:
4213 case AArch64::ST1Onev2s_POST:
4214 case AArch64::ST1Onev4h_POST:
4215 case AArch64::ST1Onev4s_POST:
4216 case AArch64::ST1Onev8b_POST:
4217 case AArch64::ST1Onev8h_POST:
4218 case AArch64::ST1Threev16b_POST:
4219 case AArch64::ST1Threev1d_POST:
4220 case AArch64::ST1Threev2d_POST:
4221 case AArch64::ST1Threev2s_POST:
4222 case AArch64::ST1Threev4h_POST:
4223 case AArch64::ST1Threev4s_POST:
4224 case AArch64::ST1Threev8b_POST:
4225 case AArch64::ST1Threev8h_POST:
4226 case AArch64::ST1Twov16b_POST:
4227 case AArch64::ST1Twov1d_POST:
4228 case AArch64::ST1Twov2d_POST:
4229 case AArch64::ST1Twov2s_POST:
4230 case AArch64::ST1Twov4h_POST:
4231 case AArch64::ST1Twov4s_POST:
4232 case AArch64::ST1Twov8b_POST:
4233 case AArch64::ST1Twov8h_POST:
4234 case AArch64::ST1i16_POST:
4235 case AArch64::ST1i32_POST:
4236 case AArch64::ST1i64_POST:
4237 case AArch64::ST1i8_POST:
4238 case AArch64::ST2GPostIndex:
4239 case AArch64::ST2Twov16b_POST:
4240 case AArch64::ST2Twov2d_POST:
4241 case AArch64::ST2Twov2s_POST:
4242 case AArch64::ST2Twov4h_POST:
4243 case AArch64::ST2Twov4s_POST:
4244 case AArch64::ST2Twov8b_POST:
4245 case AArch64::ST2Twov8h_POST:
4246 case AArch64::ST2i16_POST:
4247 case AArch64::ST2i32_POST:
4248 case AArch64::ST2i64_POST:
4249 case AArch64::ST2i8_POST:
4250 case AArch64::ST3Threev16b_POST:
4251 case AArch64::ST3Threev2d_POST:
4252 case AArch64::ST3Threev2s_POST:
4253 case AArch64::ST3Threev4h_POST:
4254 case AArch64::ST3Threev4s_POST:
4255 case AArch64::ST3Threev8b_POST:
4256 case AArch64::ST3Threev8h_POST:
4257 case AArch64::ST3i16_POST:
4258 case AArch64::ST3i32_POST:
4259 case AArch64::ST3i64_POST:
4260 case AArch64::ST3i8_POST:
4261 case AArch64::ST4Fourv16b_POST:
4262 case AArch64::ST4Fourv2d_POST:
4263 case AArch64::ST4Fourv2s_POST:
4264 case AArch64::ST4Fourv4h_POST:
4265 case AArch64::ST4Fourv4s_POST:
4266 case AArch64::ST4Fourv8b_POST:
4267 case AArch64::ST4Fourv8h_POST:
4268 case AArch64::ST4i16_POST:
4269 case AArch64::ST4i32_POST:
4270 case AArch64::ST4i64_POST:
4271 case AArch64::ST4i8_POST:
4272 case AArch64::STGPostIndex:
4273 case AArch64::STGPpost:
4274 case AArch64::STPDpost:
4275 case AArch64::STPQpost:
4276 case AArch64::STPSpost:
4277 case AArch64::STPWpost:
4278 case AArch64::STPXpost:
4279 case AArch64::STRBBpost:
4280 case AArch64::STRBpost:
4281 case AArch64::STRDpost:
4282 case AArch64::STRHHpost:
4283 case AArch64::STRHpost:
4284 case AArch64::STRQpost:
4285 case AArch64::STRSpost:
4286 case AArch64::STRWpost:
4287 case AArch64::STRXpost:
4288 case AArch64::STZ2GPostIndex:
4289 case AArch64::STZGPostIndex:
4290 return true;
4291 }
4292}
4293
4295 const MachineInstr &LdSt, const MachineOperand *&BaseOp, int64_t &Offset,
4296 bool &OffsetIsScalable, TypeSize &Width,
4297 const TargetRegisterInfo *TRI) const {
4298 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4299 // Handle only loads/stores with base register followed by immediate offset.
4300 if (LdSt.getNumExplicitOperands() == 3) {
4301 // Non-paired instruction (e.g., ldr x1, [x0, #8]).
4302 if ((!LdSt.getOperand(1).isReg() && !LdSt.getOperand(1).isFI()) ||
4303 !LdSt.getOperand(2).isImm())
4304 return false;
4305 } else if (LdSt.getNumExplicitOperands() == 4) {
4306 // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
4307 if (!LdSt.getOperand(1).isReg() ||
4308 (!LdSt.getOperand(2).isReg() && !LdSt.getOperand(2).isFI()) ||
4309 !LdSt.getOperand(3).isImm())
4310 return false;
4311 } else
4312 return false;
4313
4314 // Get the scaling factor for the instruction and set the width for the
4315 // instruction.
4316 TypeSize Scale(0U, false);
4317 int64_t Dummy1, Dummy2;
4318
4319 // If this returns false, then it's an instruction we don't want to handle.
4320 if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
4321 return false;
4322
4323 // Compute the offset. Offset is calculated as the immediate operand
4324 // multiplied by the scaling factor. Unscaled instructions have scaling factor
4325 // set to 1. Postindex are a special case which have an offset of 0.
4326 if (isPostIndexLdStOpcode(LdSt.getOpcode())) {
4327 BaseOp = &LdSt.getOperand(2);
4328 Offset = 0;
4329 } else if (LdSt.getNumExplicitOperands() == 3) {
4330 BaseOp = &LdSt.getOperand(1);
4331 Offset = LdSt.getOperand(2).getImm() * Scale.getKnownMinValue();
4332 } else {
4333 assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
4334 BaseOp = &LdSt.getOperand(2);
4335 Offset = LdSt.getOperand(3).getImm() * Scale.getKnownMinValue();
4336 }
4337 OffsetIsScalable = Scale.isScalable();
4338
4339 return BaseOp->isReg() || BaseOp->isFI();
4340}
4341
4344 assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
4345 MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
4346 assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
4347 return OfsOp;
4348}
4349
4350bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, TypeSize &Scale,
4351 TypeSize &Width, int64_t &MinOffset,
4352 int64_t &MaxOffset) {
4353 switch (Opcode) {
4354 // Not a memory operation or something we want to handle.
4355 default:
4356 Scale = TypeSize::getFixed(0);
4357 Width = TypeSize::getFixed(0);
4358 MinOffset = MaxOffset = 0;
4359 return false;
4360 // LDR / STR
4361 case AArch64::LDRQui:
4362 case AArch64::STRQui:
4363 Scale = TypeSize::getFixed(16);
4364 Width = TypeSize::getFixed(16);
4365 MinOffset = 0;
4366 MaxOffset = 4095;
4367 break;
4368 case AArch64::LDRXui:
4369 case AArch64::LDRDui:
4370 case AArch64::STRXui:
4371 case AArch64::STRDui:
4372 case AArch64::PRFMui:
4373 Scale = TypeSize::getFixed(8);
4374 Width = TypeSize::getFixed(8);
4375 MinOffset = 0;
4376 MaxOffset = 4095;
4377 break;
4378 case AArch64::LDRWui:
4379 case AArch64::LDRSui:
4380 case AArch64::LDRSWui:
4381 case AArch64::STRWui:
4382 case AArch64::STRSui:
4383 Scale = TypeSize::getFixed(4);
4384 Width = TypeSize::getFixed(4);
4385 MinOffset = 0;
4386 MaxOffset = 4095;
4387 break;
4388 case AArch64::LDRHui:
4389 case AArch64::LDRHHui:
4390 case AArch64::LDRSHWui:
4391 case AArch64::LDRSHXui:
4392 case AArch64::STRHui:
4393 case AArch64::STRHHui:
4394 Scale = TypeSize::getFixed(2);
4395 Width = TypeSize::getFixed(2);
4396 MinOffset = 0;
4397 MaxOffset = 4095;
4398 break;
4399 case AArch64::LDRBui:
4400 case AArch64::LDRBBui:
4401 case AArch64::LDRSBWui:
4402 case AArch64::LDRSBXui:
4403 case AArch64::STRBui:
4404 case AArch64::STRBBui:
4405 Scale = TypeSize::getFixed(1);
4406 Width = TypeSize::getFixed(1);
4407 MinOffset = 0;
4408 MaxOffset = 4095;
4409 break;
4410 // post/pre inc
4411 case AArch64::STRQpre:
4412 case AArch64::LDRQpost:
4413 Scale = TypeSize::getFixed(1);
4414 Width = TypeSize::getFixed(16);
4415 MinOffset = -256;
4416 MaxOffset = 255;
4417 break;
4418 case AArch64::LDRDpost:
4419 case AArch64::LDRDpre:
4420 case AArch64::LDRXpost:
4421 case AArch64::LDRXpre:
4422 case AArch64::STRDpost:
4423 case AArch64::STRDpre:
4424 case AArch64::STRXpost:
4425 case AArch64::STRXpre:
4426 Scale = TypeSize::getFixed(1);
4427 Width = TypeSize::getFixed(8);
4428 MinOffset = -256;
4429 MaxOffset = 255;
4430 break;
4431 case AArch64::STRWpost:
4432 case AArch64::STRWpre:
4433 case AArch64::LDRWpost:
4434 case AArch64::LDRWpre:
4435 case AArch64::STRSpost:
4436 case AArch64::STRSpre:
4437 case AArch64::LDRSpost:
4438 case AArch64::LDRSpre:
4439 Scale = TypeSize::getFixed(1);
4440 Width = TypeSize::getFixed(4);
4441 MinOffset = -256;
4442 MaxOffset = 255;
4443 break;
4444 case AArch64::LDRHpost:
4445 case AArch64::LDRHpre:
4446 case AArch64::STRHpost:
4447 case AArch64::STRHpre:
4448 case AArch64::LDRHHpost:
4449 case AArch64::LDRHHpre:
4450 case AArch64::STRHHpost:
4451 case AArch64::STRHHpre:
4452 Scale = TypeSize::getFixed(1);
4453 Width = TypeSize::getFixed(2);
4454 MinOffset = -256;
4455 MaxOffset = 255;
4456 break;
4457 case AArch64::LDRBpost:
4458 case AArch64::LDRBpre:
4459 case AArch64::STRBpost:
4460 case AArch64::STRBpre:
4461 case AArch64::LDRBBpost:
4462 case AArch64::LDRBBpre:
4463 case AArch64::STRBBpost:
4464 case AArch64::STRBBpre:
4465 Scale = TypeSize::getFixed(1);
4466 Width = TypeSize::getFixed(1);
4467 MinOffset = -256;
4468 MaxOffset = 255;
4469 break;
4470 // Unscaled
4471 case AArch64::LDURQi:
4472 case AArch64::STURQi:
4473 Scale = TypeSize::getFixed(1);
4474 Width = TypeSize::getFixed(16);
4475 MinOffset = -256;
4476 MaxOffset = 255;
4477 break;
4478 case AArch64::LDURXi:
4479 case AArch64::LDURDi:
4480 case AArch64::LDAPURXi:
4481 case AArch64::STURXi:
4482 case AArch64::STURDi:
4483 case AArch64::STLURXi:
4484 case AArch64::PRFUMi:
4485 Scale = TypeSize::getFixed(1);
4486 Width = TypeSize::getFixed(8);
4487 MinOffset = -256;
4488 MaxOffset = 255;
4489 break;
4490 case AArch64::LDURWi:
4491 case AArch64::LDURSi:
4492 case AArch64::LDURSWi:
4493 case AArch64::LDAPURi:
4494 case AArch64::LDAPURSWi:
4495 case AArch64::STURWi:
4496 case AArch64::STURSi:
4497 case AArch64::STLURWi:
4498 Scale = TypeSize::getFixed(1);
4499 Width = TypeSize::getFixed(4);
4500 MinOffset = -256;
4501 MaxOffset = 255;
4502 break;
4503 case AArch64::LDURHi:
4504 case AArch64::LDURHHi:
4505 case AArch64::LDURSHXi:
4506 case AArch64::LDURSHWi:
4507 case AArch64::LDAPURHi:
4508 case AArch64::LDAPURSHWi:
4509 case AArch64::LDAPURSHXi:
4510 case AArch64::STURHi:
4511 case AArch64::STURHHi:
4512 case AArch64::STLURHi:
4513 Scale = TypeSize::getFixed(1);
4514 Width = TypeSize::getFixed(2);
4515 MinOffset = -256;
4516 MaxOffset = 255;
4517 break;
4518 case AArch64::LDURBi:
4519 case AArch64::LDURBBi:
4520 case AArch64::LDURSBXi:
4521 case AArch64::LDURSBWi:
4522 case AArch64::LDAPURBi:
4523 case AArch64::LDAPURSBWi:
4524 case AArch64::LDAPURSBXi:
4525 case AArch64::STURBi:
4526 case AArch64::STURBBi:
4527 case AArch64::STLURBi:
4528 Scale = TypeSize::getFixed(1);
4529 Width = TypeSize::getFixed(1);
4530 MinOffset = -256;
4531 MaxOffset = 255;
4532 break;
4533 // LDP / STP (including pre/post inc)
4534 case AArch64::LDPQi:
4535 case AArch64::LDNPQi:
4536 case AArch64::STPQi:
4537 case AArch64::STNPQi:
4538 case AArch64::LDPQpost:
4539 case AArch64::LDPQpre:
4540 case AArch64::STPQpost:
4541 case AArch64::STPQpre:
4542 Scale = TypeSize::getFixed(16);
4543 Width = TypeSize::getFixed(16 * 2);
4544 MinOffset = -64;
4545 MaxOffset = 63;
4546 break;
4547 case AArch64::LDPXi:
4548 case AArch64::LDPDi:
4549 case AArch64::LDNPXi:
4550 case AArch64::LDNPDi:
4551 case AArch64::STPXi:
4552 case AArch64::STPDi:
4553 case AArch64::STNPXi:
4554 case AArch64::STNPDi:
4555 case AArch64::LDPDpost:
4556 case AArch64::LDPDpre:
4557 case AArch64::LDPXpost:
4558 case AArch64::LDPXpre:
4559 case AArch64::STPDpost:
4560 case AArch64::STPDpre:
4561 case AArch64::STPXpost:
4562 case AArch64::STPXpre:
4563 Scale = TypeSize::getFixed(8);
4564 Width = TypeSize::getFixed(8 * 2);
4565 MinOffset = -64;
4566 MaxOffset = 63;
4567 break;
4568 case AArch64::LDPWi:
4569 case AArch64::LDPSi:
4570 case AArch64::LDNPWi:
4571 case AArch64::LDNPSi:
4572 case AArch64::STPWi:
4573 case AArch64::STPSi:
4574 case AArch64::STNPWi:
4575 case AArch64::STNPSi:
4576 case AArch64::LDPSpost:
4577 case AArch64::LDPSpre:
4578 case AArch64::LDPWpost:
4579 case AArch64::LDPWpre:
4580 case AArch64::STPSpost:
4581 case AArch64::STPSpre:
4582 case AArch64::STPWpost:
4583 case AArch64::STPWpre:
4584 Scale = TypeSize::getFixed(4);
4585 Width = TypeSize::getFixed(4 * 2);
4586 MinOffset = -64;
4587 MaxOffset = 63;
4588 break;
4589 case AArch64::StoreSwiftAsyncContext:
4590 // Store is an STRXui, but there might be an ADDXri in the expansion too.
4591 Scale = TypeSize::getFixed(1);
4592 Width = TypeSize::getFixed(8);
4593 MinOffset = 0;
4594 MaxOffset = 4095;
4595 break;
4596 case AArch64::ADDG:
4597 Scale = TypeSize::getFixed(16);
4598 Width = TypeSize::getFixed(0);
4599 MinOffset = 0;
4600 MaxOffset = 63;
4601 break;
4602 case AArch64::TAGPstack:
4603 Scale = TypeSize::getFixed(16);
4604 Width = TypeSize::getFixed(0);
4605 // TAGP with a negative offset turns into SUBP, which has a maximum offset
4606 // of 63 (not 64!).
4607 MinOffset = -63;
4608 MaxOffset = 63;
4609 break;
4610 case AArch64::LDG:
4611 case AArch64::STGi:
4612 case AArch64::STGPreIndex:
4613 case AArch64::STGPostIndex:
4614 case AArch64::STZGi:
4615 case AArch64::STZGPreIndex:
4616 case AArch64::STZGPostIndex:
4617 Scale = TypeSize::getFixed(16);
4618 Width = TypeSize::getFixed(16);
4619 MinOffset = -256;
4620 MaxOffset = 255;
4621 break;
4622 // SVE
4623 case AArch64::STR_ZZZZXI:
4624 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
4625 case AArch64::LDR_ZZZZXI:
4626 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
4627 Scale = TypeSize::getScalable(16);
4628 Width = TypeSize::getScalable(16 * 4);
4629 MinOffset = -256;
4630 MaxOffset = 252;
4631 break;
4632 case AArch64::STR_ZZZXI:
4633 case AArch64::LDR_ZZZXI:
4634 Scale = TypeSize::getScalable(16);
4635 Width = TypeSize::getScalable(16 * 3);
4636 MinOffset = -256;
4637 MaxOffset = 253;
4638 break;
4639 case AArch64::STR_ZZXI:
4640 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
4641 case AArch64::LDR_ZZXI:
4642 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
4643 Scale = TypeSize::getScalable(16);
4644 Width = TypeSize::getScalable(16 * 2);
4645 MinOffset = -256;
4646 MaxOffset = 254;
4647 break;
4648 case AArch64::LDR_PXI:
4649 case AArch64::STR_PXI:
4650 Scale = TypeSize::getScalable(2);
4651 Width = TypeSize::getScalable(2);
4652 MinOffset = -256;
4653 MaxOffset = 255;
4654 break;
4655 case AArch64::LDR_PPXI:
4656 case AArch64::STR_PPXI:
4657 Scale = TypeSize::getScalable(2);
4658 Width = TypeSize::getScalable(2 * 2);
4659 MinOffset = -256;
4660 MaxOffset = 254;
4661 break;
4662 case AArch64::LDR_ZXI:
4663 case AArch64::STR_ZXI:
4664 Scale = TypeSize::getScalable(16);
4665 Width = TypeSize::getScalable(16);
4666 MinOffset = -256;
4667 MaxOffset = 255;
4668 break;
4669 case AArch64::LD1B_IMM:
4670 case AArch64::LD1H_IMM:
4671 case AArch64::LD1W_IMM:
4672 case AArch64::LD1D_IMM:
4673 case AArch64::LDNT1B_ZRI:
4674 case AArch64::LDNT1H_ZRI:
4675 case AArch64::LDNT1W_ZRI:
4676 case AArch64::LDNT1D_ZRI:
4677 case AArch64::ST1B_IMM:
4678 case AArch64::ST1H_IMM:
4679 case AArch64::ST1W_IMM:
4680 case AArch64::ST1D_IMM:
4681 case AArch64::STNT1B_ZRI:
4682 case AArch64::STNT1H_ZRI:
4683 case AArch64::STNT1W_ZRI:
4684 case AArch64::STNT1D_ZRI:
4685 case AArch64::LDNF1B_IMM:
4686 case AArch64::LDNF1H_IMM:
4687 case AArch64::LDNF1W_IMM:
4688 case AArch64::LDNF1D_IMM:
4689 // A full vectors worth of data
4690 // Width = mbytes * elements
4691 Scale = TypeSize::getScalable(16);
4692 Width = TypeSize::getScalable(16);
4693 MinOffset = -8;
4694 MaxOffset = 7;
4695 break;
4696 case AArch64::LD2B_IMM:
4697 case AArch64::LD2H_IMM:
4698 case AArch64::LD2W_IMM:
4699 case AArch64::LD2D_IMM:
4700 case AArch64::ST2B_IMM:
4701 case AArch64::ST2H_IMM:
4702 case AArch64::ST2W_IMM:
4703 case AArch64::ST2D_IMM:
4704 Scale = TypeSize::getScalable(32);
4705 Width = TypeSize::getScalable(16 * 2);
4706 MinOffset = -8;
4707 MaxOffset = 7;
4708 break;
4709 case AArch64::LD3B_IMM:
4710 case AArch64::LD3H_IMM:
4711 case AArch64::LD3W_IMM:
4712 case AArch64::LD3D_IMM:
4713 case AArch64::ST3B_IMM:
4714 case AArch64::ST3H_IMM:
4715 case AArch64::ST3W_IMM:
4716 case AArch64::ST3D_IMM:
4717 Scale = TypeSize::getScalable(48);
4718 Width = TypeSize::getScalable(16 * 3);
4719 MinOffset = -8;
4720 MaxOffset = 7;
4721 break;
4722 case AArch64::LD4B_IMM:
4723 case AArch64::LD4H_IMM:
4724 case AArch64::LD4W_IMM:
4725 case AArch64::LD4D_IMM:
4726 case AArch64::ST4B_IMM:
4727 case AArch64::ST4H_IMM:
4728 case AArch64::ST4W_IMM:
4729 case AArch64::ST4D_IMM:
4730 Scale = TypeSize::getScalable(64);
4731 Width = TypeSize::getScalable(16 * 4);
4732 MinOffset = -8;
4733 MaxOffset = 7;
4734 break;
4735 case AArch64::LD1B_H_IMM:
4736 case AArch64::LD1SB_H_IMM:
4737 case AArch64::LD1H_S_IMM:
4738 case AArch64::LD1SH_S_IMM:
4739 case AArch64::LD1W_D_IMM:
4740 case AArch64::LD1SW_D_IMM:
4741 case AArch64::ST1B_H_IMM:
4742 case AArch64::ST1H_S_IMM:
4743 case AArch64::ST1W_D_IMM:
4744 case AArch64::LDNF1B_H_IMM:
4745 case AArch64::LDNF1SB_H_IMM:
4746 case AArch64::LDNF1H_S_IMM:
4747 case AArch64::LDNF1SH_S_IMM:
4748 case AArch64::LDNF1W_D_IMM:
4749 case AArch64::LDNF1SW_D_IMM:
4750 // A half vector worth of data
4751 // Width = mbytes * elements
4752 Scale = TypeSize::getScalable(8);
4753 Width = TypeSize::getScalable(8);
4754 MinOffset = -8;
4755 MaxOffset = 7;
4756 break;
4757 case AArch64::LD1B_S_IMM:
4758 case AArch64::LD1SB_S_IMM:
4759 case AArch64::LD1H_D_IMM:
4760 case AArch64::LD1SH_D_IMM:
4761 case AArch64::ST1B_S_IMM:
4762 case AArch64::ST1H_D_IMM:
4763 case AArch64::LDNF1B_S_IMM:
4764 case AArch64::LDNF1SB_S_IMM:
4765 case AArch64::LDNF1H_D_IMM:
4766 case AArch64::LDNF1SH_D_IMM:
4767 // A quarter vector worth of data
4768 // Width = mbytes * elements
4769 Scale = TypeSize::getScalable(4);
4770 Width = TypeSize::getScalable(4);
4771 MinOffset = -8;
4772 MaxOffset = 7;
4773 break;
4774 case AArch64::LD1B_D_IMM:
4775 case AArch64::LD1SB_D_IMM:
4776 case AArch64::ST1B_D_IMM:
4777 case AArch64::LDNF1B_D_IMM:
4778 case AArch64::LDNF1SB_D_IMM:
4779 // A eighth vector worth of data
4780 // Width = mbytes * elements
4781 Scale = TypeSize::getScalable(2);
4782 Width = TypeSize::getScalable(2);
4783 MinOffset = -8;
4784 MaxOffset = 7;
4785 break;
4786 case AArch64::ST2Gi:
4787 case AArch64::ST2GPreIndex:
4788 case AArch64::ST2GPostIndex:
4789 case AArch64::STZ2Gi:
4790 case AArch64::STZ2GPreIndex:
4791 case AArch64::STZ2GPostIndex:
4792 Scale = TypeSize::getFixed(16);
4793 Width = TypeSize::getFixed(32);
4794 MinOffset = -256;
4795 MaxOffset = 255;
4796 break;
4797 case AArch64::STGPi:
4798 case AArch64::STGPpost:
4799 case AArch64::STGPpre:
4800 Scale = TypeSize::getFixed(16);
4801 Width = TypeSize::getFixed(16);
4802 MinOffset = -64;
4803 MaxOffset = 63;
4804 break;
4805 case AArch64::LD1RB_IMM:
4806 case AArch64::LD1RB_H_IMM:
4807 case AArch64::LD1RB_S_IMM:
4808 case AArch64::LD1RB_D_IMM:
4809 case AArch64::LD1RSB_H_IMM:
4810 case AArch64::LD1RSB_S_IMM:
4811 case AArch64::LD1RSB_D_IMM:
4812 Scale = TypeSize::getFixed(1);
4813 Width = TypeSize::getFixed(1);
4814 MinOffset = 0;
4815 MaxOffset = 63;
4816 break;
4817 case AArch64::LD1RH_IMM:
4818 case AArch64::LD1RH_S_IMM:
4819 case AArch64::LD1RH_D_IMM:
4820 case AArch64::LD1RSH_S_IMM:
4821 case AArch64::LD1RSH_D_IMM:
4822 Scale = TypeSize::getFixed(2);
4823 Width = TypeSize::getFixed(2);
4824 MinOffset = 0;
4825 MaxOffset = 63;
4826 break;
4827 case AArch64::LD1RW_IMM:
4828 case AArch64::LD1RW_D_IMM:
4829 case AArch64::LD1RSW_IMM:
4830 Scale = TypeSize::getFixed(4);
4831 Width = TypeSize::getFixed(4);
4832 MinOffset = 0;
4833 MaxOffset = 63;
4834 break;
4835 case AArch64::LD1RD_IMM:
4836 Scale = TypeSize::getFixed(8);
4837 Width = TypeSize::getFixed(8);
4838 MinOffset = 0;
4839 MaxOffset = 63;
4840 break;
4841 }
4842
4843 return true;
4844}
4845
4846// Scaling factor for unscaled load or store.
4848 switch (Opc) {
4849 default:
4850 llvm_unreachable("Opcode has unknown scale!");
4851 case AArch64::LDRBBui:
4852 case AArch64::LDURBBi:
4853 case AArch64::LDRSBWui:
4854 case AArch64::LDURSBWi:
4855 case AArch64::STRBBui:
4856 case AArch64::STURBBi:
4857 return 1;
4858 case AArch64::LDRHHui:
4859 case AArch64::LDURHHi:
4860 case AArch64::LDRSHWui:
4861 case AArch64::LDURSHWi:
4862 case AArch64::STRHHui:
4863 case AArch64::STURHHi:
4864 return 2;
4865 case AArch64::LDRSui:
4866 case AArch64::LDURSi:
4867 case AArch64::LDRSpre:
4868 case AArch64::LDRSWui:
4869 case AArch64::LDURSWi:
4870 case AArch64::LDRSWpre:
4871 case AArch64::LDRWpre:
4872 case AArch64::LDRWui:
4873 case AArch64::LDURWi:
4874 case AArch64::STRSui:
4875 case AArch64::STURSi:
4876 case AArch64::STRSpre:
4877 case AArch64::STRWui:
4878 case AArch64::STURWi:
4879 case AArch64::STRWpre:
4880 case AArch64::LDPSi:
4881 case AArch64::LDPSWi:
4882 case AArch64::LDPWi:
4883 case AArch64::STPSi:
4884 case AArch64::STPWi:
4885 return 4;
4886 case AArch64::LDRDui:
4887 case AArch64::LDURDi:
4888 case AArch64::LDRDpre:
4889 case AArch64::LDRXui:
4890 case AArch64::LDURXi:
4891 case AArch64::LDRXpre:
4892 case AArch64::STRDui:
4893 case AArch64::STURDi:
4894 case AArch64::STRDpre:
4895 case AArch64::STRXui:
4896 case AArch64::STURXi:
4897 case AArch64::STRXpre:
4898 case AArch64::LDPDi:
4899 case AArch64::LDPXi:
4900 case AArch64::STPDi:
4901 case AArch64::STPXi:
4902 return 8;
4903 case AArch64::LDRQui:
4904 case AArch64::LDURQi:
4905 case AArch64::STRQui:
4906 case AArch64::STURQi:
4907 case AArch64::STRQpre:
4908 case AArch64::LDPQi:
4909 case AArch64::LDRQpre:
4910 case AArch64::STPQi:
4911 case AArch64::STGi:
4912 case AArch64::STZGi:
4913 case AArch64::ST2Gi:
4914 case AArch64::STZ2Gi:
4915 case AArch64::STGPi:
4916 return 16;
4917 }
4918}
4919
4921 switch (MI.getOpcode()) {
4922 default:
4923 return false;
4924 case AArch64::LDRWpre:
4925 case AArch64::LDRXpre:
4926 case AArch64::LDRSWpre:
4927 case AArch64::LDRSpre:
4928 case AArch64::LDRDpre:
4929 case AArch64::LDRQpre:
4930 return true;
4931 }
4932}
4933
4935 switch (MI.getOpcode()) {
4936 default:
4937 return false;
4938 case AArch64::STRWpre:
4939 case AArch64::STRXpre:
4940 case AArch64::STRSpre:
4941 case AArch64::STRDpre:
4942 case AArch64::STRQpre:
4943 return true;
4944 }
4945}
4946
4948 return isPreLd(MI) || isPreSt(MI);
4949}
4950
4952 switch (MI.getOpcode()) {
4953 default:
4954 return false;
4955 case AArch64::LDPSi:
4956 case AArch64::LDPSWi:
4957 case AArch64::LDPDi:
4958 case AArch64::LDPQi:
4959 case AArch64::LDPWi:
4960 case AArch64::LDPXi:
4961 case AArch64::STPSi:
4962 case AArch64::STPDi:
4963 case AArch64::STPQi:
4964 case AArch64::STPWi:
4965 case AArch64::STPXi:
4966 case AArch64::STGPi:
4967 return true;
4968 }
4969}
4970
4972 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4973 unsigned Idx =
4975 : 1;
4976 return MI.getOperand(Idx);
4977}
4978
4979const MachineOperand &
4981 assert(MI.mayLoadOrStore() && "Load or store instruction expected");
4982 unsigned Idx =
4984 : 2;
4985 return MI.getOperand(Idx);
4986}
4987
4988const MachineOperand &
4990 switch (MI.getOpcode()) {
4991 default:
4992 llvm_unreachable("Unexpected opcode");
4993 case AArch64::LDRBroX:
4994 case AArch64::LDRBBroX:
4995 case AArch64::LDRSBXroX:
4996 case AArch64::LDRSBWroX:
4997 case AArch64::LDRHroX:
4998 case AArch64::LDRHHroX:
4999 case AArch64::LDRSHXroX:
5000 case AArch64::LDRSHWroX:
5001 case AArch64::LDRWroX:
5002 case AArch64::LDRSroX:
5003 case AArch64::LDRSWroX:
5004 case AArch64::LDRDroX:
5005 case AArch64::LDRXroX:
5006 case AArch64::LDRQroX:
5007 return MI.getOperand(4);
5008 }
5009}
5010
5012 Register Reg) {
5013 if (MI.getParent() == nullptr)
5014 return nullptr;
5015 const MachineFunction *MF = MI.getParent()->getParent();
5016 return MF ? MF->getRegInfo().getRegClassOrNull(Reg) : nullptr;
5017}
5018
5020 auto IsHFPR = [&](const MachineOperand &Op) {
5021 if (!Op.isReg())
5022 return false;
5023 auto Reg = Op.getReg();
5024 if (Reg.isPhysical())
5025 return AArch64::FPR16RegClass.contains(Reg);
5026 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5027 return TRC == &AArch64::FPR16RegClass ||
5028 TRC == &AArch64::FPR16_loRegClass;
5029 };
5030 return llvm::any_of(MI.operands(), IsHFPR);
5031}
5032
5034 auto IsQFPR = [&](const MachineOperand &Op) {
5035 if (!Op.isReg())
5036 return false;
5037 auto Reg = Op.getReg();
5038 if (Reg.isPhysical())
5039 return AArch64::FPR128RegClass.contains(Reg);
5040 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5041 return TRC == &AArch64::FPR128RegClass ||
5042 TRC == &AArch64::FPR128_loRegClass;
5043 };
5044 return llvm::any_of(MI.operands(), IsQFPR);
5045}
5046
5048 switch (MI.getOpcode()) {
5049 case AArch64::BRK:
5050 case AArch64::HLT:
5051 case AArch64::PACIASP:
5052 case AArch64::PACIBSP:
5053 // Implicit BTI behavior.
5054 return true;
5055 case AArch64::PAUTH_PROLOGUE:
5056 // PAUTH_PROLOGUE expands to PACI(A|B)SP.
5057 return true;
5058 case AArch64::HINT: {
5059 unsigned Imm = MI.getOperand(0).getImm();
5060 // Explicit BTI instruction.
5061 if (Imm == 32 || Imm == 34 || Imm == 36 || Imm == 38)
5062 return true;
5063 // PACI(A|B)SP instructions.
5064 if (Imm == 25 || Imm == 27)
5065 return true;
5066 return false;
5067 }
5068 default:
5069 return false;
5070 }
5071}
5072
5074 if (Reg == 0)
5075 return false;
5076 assert(Reg.isPhysical() && "Expected physical register in isFpOrNEON");
5077 return AArch64::FPR128RegClass.contains(Reg) ||
5078 AArch64::FPR64RegClass.contains(Reg) ||
5079 AArch64::FPR32RegClass.contains(Reg) ||
5080 AArch64::FPR16RegClass.contains(Reg) ||
5081 AArch64::FPR8RegClass.contains(Reg);
5082}
5083
5085 auto IsFPR = [&](const MachineOperand &Op) {
5086 if (!Op.isReg())
5087 return false;
5088 auto Reg = Op.getReg();
5089 if (Reg.isPhysical())
5090 return isFpOrNEON(Reg);
5091
5092 const TargetRegisterClass *TRC = ::getRegClass(MI, Reg);
5093 return TRC == &AArch64::FPR128RegClass ||
5094 TRC == &AArch64::FPR128_loRegClass ||
5095 TRC == &AArch64::FPR64RegClass ||
5096 TRC == &AArch64::FPR64_loRegClass ||
5097 TRC == &AArch64::FPR32RegClass || TRC == &AArch64::FPR16RegClass ||
5098 TRC == &AArch64::FPR8RegClass;
5099 };
5100 return llvm::any_of(MI.operands(), IsFPR);
5101}
5102
5103// Scale the unscaled offsets. Returns false if the unscaled offset can't be
5104// scaled.
5105static bool scaleOffset(unsigned Opc, int64_t &Offset) {
5107
5108 // If the byte-offset isn't a multiple of the stride, we can't scale this
5109 // offset.
5110 if (Offset % Scale != 0)
5111 return false;
5112
5113 // Convert the byte-offset used by unscaled into an "element" offset used
5114 // by the scaled pair load/store instructions.
5115 Offset /= Scale;
5116 return true;
5117}
5118
5119static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
5120 if (FirstOpc == SecondOpc)
5121 return true;
5122 // We can also pair sign-ext and zero-ext instructions.
5123 switch (FirstOpc) {
5124 default:
5125 return false;
5126 case AArch64::STRSui:
5127 case AArch64::STURSi:
5128 return SecondOpc == AArch64::STRSui || SecondOpc == AArch64::STURSi;
5129 case AArch64::STRDui:
5130 case AArch64::STURDi:
5131 return SecondOpc == AArch64::STRDui || SecondOpc == AArch64::STURDi;
5132 case AArch64::STRQui:
5133 case AArch64::STURQi:
5134 return SecondOpc == AArch64::STRQui || SecondOpc == AArch64::STURQi;
5135 case AArch64::STRWui:
5136 case AArch64::STURWi:
5137 return SecondOpc == AArch64::STRWui || SecondOpc == AArch64::STURWi;
5138 case AArch64::STRXui:
5139 case AArch64::STURXi:
5140 return SecondOpc == AArch64::STRXui || SecondOpc == AArch64::STURXi;
5141 case AArch64::LDRSui:
5142 case AArch64::LDURSi:
5143 return SecondOpc == AArch64::LDRSui || SecondOpc == AArch64::LDURSi;
5144 case AArch64::LDRDui:
5145 case AArch64::LDURDi:
5146 return SecondOpc == AArch64::LDRDui || SecondOpc == AArch64::LDURDi;
5147 case AArch64::LDRQui:
5148 case AArch64::LDURQi:
5149 return SecondOpc == AArch64::LDRQui || SecondOpc == AArch64::LDURQi;
5150 case AArch64::LDRWui:
5151 case AArch64::LDURWi:
5152 return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
5153 case AArch64::LDRSWui:
5154 case AArch64::LDURSWi:
5155 return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
5156 case AArch64::LDRXui:
5157 case AArch64::LDURXi:
5158 return SecondOpc == AArch64::LDRXui || SecondOpc == AArch64::LDURXi;
5159 }
5160 // These instructions can't be paired based on their opcodes.
5161 return false;
5162}
5163
5164static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1,
5165 int64_t Offset1, unsigned Opcode1, int FI2,
5166 int64_t Offset2, unsigned Opcode2) {
5167 // Accesses through fixed stack object frame indices may access a different
5168 // fixed stack slot. Check that the object offsets + offsets match.
5169 if (MFI.isFixedObjectIndex(FI1) && MFI.isFixedObjectIndex(FI2)) {
5170 int64_t ObjectOffset1 = MFI.getObjectOffset(FI1);
5171 int64_t ObjectOffset2 = MFI.getObjectOffset(FI2);
5172 assert(ObjectOffset1 <= ObjectOffset2 && "Object offsets are not ordered.");
5173 // Convert to scaled object offsets.
5174 int Scale1 = AArch64InstrInfo::getMemScale(Opcode1);
5175 if (ObjectOffset1 % Scale1 != 0)
5176 return false;
5177 ObjectOffset1 /= Scale1;
5178 int Scale2 = AArch64InstrInfo::getMemScale(Opcode2);
5179 if (ObjectOffset2 % Scale2 != 0)
5180 return false;
5181 ObjectOffset2 /= Scale2;
5182 ObjectOffset1 += Offset1;
5183 ObjectOffset2 += Offset2;
5184 return ObjectOffset1 + 1 == ObjectOffset2;
5185 }
5186
5187 return FI1 == FI2;
5188}
5189
5190/// Detect opportunities for ldp/stp formation.
5191///
5192/// Only called for LdSt for which getMemOperandWithOffset returns true.
5194 ArrayRef<const MachineOperand *> BaseOps1, int64_t OpOffset1,
5195 bool OffsetIsScalable1, ArrayRef<const MachineOperand *> BaseOps2,
5196 int64_t OpOffset2, bool OffsetIsScalable2, unsigned ClusterSize,
5197 unsigned NumBytes) const {
5198 assert(BaseOps1.size() == 1 && BaseOps2.size() == 1);
5199 const MachineOperand &BaseOp1 = *BaseOps1.front();
5200 const MachineOperand &BaseOp2 = *BaseOps2.front();
5201 const MachineInstr &FirstLdSt = *BaseOp1.getParent();
5202 const MachineInstr &SecondLdSt = *BaseOp2.getParent();
5203 if (BaseOp1.getType() != BaseOp2.getType())
5204 return false;
5205
5206 assert((BaseOp1.isReg() || BaseOp1.isFI()) &&
5207 "Only base registers and frame indices are supported.");
5208
5209 // Check for both base regs and base FI.
5210 if (BaseOp1.isReg() && BaseOp1.getReg() != BaseOp2.getReg())
5211 return false;
5212
5213 // Only cluster up to a single pair.
5214 if (ClusterSize > 2)
5215 return false;
5216
5217 if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
5218 return false;
5219
5220 // Can we pair these instructions based on their opcodes?
5221 unsigned FirstOpc = FirstLdSt.getOpcode();
5222 unsigned SecondOpc = SecondLdSt.getOpcode();
5223 if (!canPairLdStOpc(FirstOpc, SecondOpc))
5224 return false;
5225
5226 // Can't merge volatiles or load/stores that have a hint to avoid pair
5227 // formation, for example.
5228 if (!isCandidateToMergeOrPair(FirstLdSt) ||
5229 !isCandidateToMergeOrPair(SecondLdSt))
5230 return false;
5231
5232 // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
5233 int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
5234 if (hasUnscaledLdStOffset(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
5235 return false;
5236
5237 int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
5238 if (hasUnscaledLdStOffset(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
5239 return false;
5240
5241 // Pairwise instructions have a 7-bit signed offset field.
5242 if (Offset1 > 63 || Offset1 < -64)
5243 return false;
5244
5245 // The caller should already have ordered First/SecondLdSt by offset.
5246 // Note: except for non-equal frame index bases
5247 if (BaseOp1.isFI()) {
5248 assert((!BaseOp1.isIdenticalTo(BaseOp2) || Offset1 <= Offset2) &&
5249 "Caller should have ordered offsets.");
5250
5251 const MachineFrameInfo &MFI =
5252 FirstLdSt.getParent()->getParent()->getFrameInfo();
5253 return shouldClusterFI(MFI, BaseOp1.getIndex(), Offset1, FirstOpc,
5254 BaseOp2.getIndex(), Offset2, SecondOpc);
5255 }
5256
5257 assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
5258
5259 return Offset1 + 1 == Offset2;
5260}
5261
5263 MCRegister Reg, unsigned SubIdx,
5264 unsigned State,
5265 const TargetRegisterInfo *TRI) {
5266 if (!SubIdx)
5267 return MIB.addReg(Reg, State);
5268
5269 if (Reg.isPhysical())
5270 return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
5271 return MIB.addReg(Reg, State, SubIdx);
5272}
5273
5274static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
5275 unsigned NumRegs) {
5276 // We really want the positive remainder mod 32 here, that happens to be
5277 // easily obtainable with a mask.
5278 return ((DestReg - SrcReg) & 0x1f) < NumRegs;
5279}
5280
5283 const DebugLoc &DL, MCRegister DestReg,
5284 MCRegister SrcReg, bool KillSrc,
5285 unsigned Opcode,
5286 ArrayRef<unsigned> Indices) const {
5287 assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
5289 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5290 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5291 unsigned NumRegs = Indices.size();
5292
5293 int SubReg = 0, End = NumRegs, Incr = 1;
5294 if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
5295 SubReg = NumRegs - 1;
5296 End = -1;
5297 Incr = -1;
5298 }
5299
5300 for (; SubReg != End; SubReg += Incr) {
5301 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5302 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5303 AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
5304 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5305 }
5306}
5307
5310 const DebugLoc &DL, MCRegister DestReg,
5311 MCRegister SrcReg, bool KillSrc,
5312 unsigned Opcode, unsigned ZeroReg,
5313 llvm::ArrayRef<unsigned> Indices) const {
5315 unsigned NumRegs = Indices.size();
5316
5317#ifndef NDEBUG
5318 uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
5319 uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
5320 assert(DestEncoding % NumRegs == 0 && SrcEncoding % NumRegs == 0 &&
5321 "GPR reg sequences should not be able to overlap");
5322#endif
5323
5324 for (unsigned SubReg = 0; SubReg != NumRegs; ++SubReg) {
5325 const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
5326 AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
5327 MIB.addReg(ZeroReg);
5328 AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
5329 MIB.addImm(0);
5330 }
5331}
5332
5335 const DebugLoc &DL, Register DestReg,
5336 Register SrcReg, bool KillSrc,
5337 bool RenamableDest,
5338 bool RenamableSrc) const {
5339 if (AArch64::GPR32spRegClass.contains(DestReg) &&
5340 AArch64::GPR32spRegClass.contains(SrcReg)) {
5341 if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
5342 // If either operand is WSP, expand to ADD #0.
5343 if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5344 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5345 // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
5346 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5347 &AArch64::GPR64spRegClass);
5348 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5349 &AArch64::GPR64spRegClass);
5350 // This instruction is reading and writing X registers. This may upset
5351 // the register scavenger and machine verifier, so we need to indicate
5352 // that we are reading an undefined value from SrcRegX, but a proper
5353 // value from SrcReg.
5354 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
5355 .addReg(SrcRegX, RegState::Undef)
5356 .addImm(0)
5358 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5359 } else {
5360 BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
5361 .addReg(SrcReg, getKillRegState(KillSrc))
5362 .addImm(0)
5364 }
5365 } else if (Subtarget.hasZeroCycleRegMoveGPR64() &&
5366 !Subtarget.hasZeroCycleRegMoveGPR32()) {
5367 // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
5368 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5369 &AArch64::GPR64spRegClass);
5370 assert(DestRegX.isValid() && "Destination super-reg not valid");
5371 MCRegister SrcRegX = RI.getMatchingSuperReg(SrcReg, AArch64::sub_32,
5372 &AArch64::GPR64spRegClass);
5373 assert(SrcRegX.isValid() && "Source super-reg not valid");
5374 // This instruction is reading and writing X registers. This may upset
5375 // the register scavenger and machine verifier, so we need to indicate
5376 // that we are reading an undefined value from SrcRegX, but a proper
5377 // value from SrcReg.
5378 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
5379 .addReg(AArch64::XZR)
5380 .addReg(SrcRegX, RegState::Undef)
5381 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5382 } else {
5383 // Otherwise, expand to ORR WZR.
5384 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5385 .addReg(AArch64::WZR)
5386 .addReg(SrcReg, getKillRegState(KillSrc));
5387 }
5388 return;
5389 }
5390
5391 // GPR32 zeroing
5392 if (AArch64::GPR32spRegClass.contains(DestReg) && SrcReg == AArch64::WZR) {
5393 if (Subtarget.hasZeroCycleZeroingGPR64() &&
5394 !Subtarget.hasZeroCycleZeroingGPR32()) {
5395 MCRegister DestRegX = RI.getMatchingSuperReg(DestReg, AArch64::sub_32,
5396 &AArch64::GPR64spRegClass);
5397 assert(DestRegX.isValid() && "Destination super-reg not valid");
5398 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestRegX)
5399 .addImm(0)
5401 } else if (Subtarget.hasZeroCycleZeroingGPR32()) {
5402 BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
5403 .addImm(0)
5405 } else {
5406 BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
5407 .addReg(AArch64::WZR)
5408 .addReg(AArch64::WZR);
5409 }
5410 return;
5411 }
5412
5413 if (AArch64::GPR64spRegClass.contains(DestReg) &&
5414 AArch64::GPR64spRegClass.contains(SrcReg)) {
5415 if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
5416 // If either operand is SP, expand to ADD #0.
5417 BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
5418 .addReg(SrcReg, getKillRegState(KillSrc))
5419 .addImm(0)
5421 } else {
5422 // Otherwise, expand to ORR XZR.
5423 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5424 .addReg(AArch64::XZR)
5425 .addReg(SrcReg, getKillRegState(KillSrc));
5426 }
5427 return;
5428 }
5429
5430 // GPR64 zeroing
5431 if (AArch64::GPR64spRegClass.contains(DestReg) && SrcReg == AArch64::XZR) {
5432 if (Subtarget.hasZeroCycleZeroingGPR64()) {
5433 BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
5434 .addImm(0)
5436 } else {
5437 BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
5438 .addReg(AArch64::XZR)
5439 .addReg(AArch64::XZR);
5440 }
5441 return;
5442 }
5443
5444 // Copy a Predicate register by ORRing with itself.
5445 if (AArch64::PPRRegClass.contains(DestReg) &&
5446 AArch64::PPRRegClass.contains(SrcReg)) {
5447 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5448 "Unexpected SVE register.");
5449 BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), DestReg)
5450 .addReg(SrcReg) // Pg
5451 .addReg(SrcReg)
5452 .addReg(SrcReg, getKillRegState(KillSrc));
5453 return;
5454 }
5455
5456 // Copy a predicate-as-counter register by ORRing with itself as if it
5457 // were a regular predicate (mask) register.
5458 bool DestIsPNR = AArch64::PNRRegClass.contains(DestReg);
5459 bool SrcIsPNR = AArch64::PNRRegClass.contains(SrcReg);
5460 if (DestIsPNR || SrcIsPNR) {
5461 auto ToPPR = [](MCRegister R) -> MCRegister {
5462 return (R - AArch64::PN0) + AArch64::P0;
5463 };
5464 MCRegister PPRSrcReg = SrcIsPNR ? ToPPR(SrcReg) : SrcReg.asMCReg();
5465 MCRegister PPRDestReg = DestIsPNR ? ToPPR(DestReg) : DestReg.asMCReg();
5466
5467 if (PPRSrcReg != PPRDestReg) {
5468 auto NewMI = BuildMI(MBB, I, DL, get(AArch64::ORR_PPzPP), PPRDestReg)
5469 .addReg(PPRSrcReg) // Pg
5470 .addReg(PPRSrcReg)
5471 .addReg(PPRSrcReg, getKillRegState(KillSrc));
5472 if (DestIsPNR)
5473 NewMI.addDef(DestReg, RegState::Implicit);
5474 }
5475 return;
5476 }
5477
5478 // Copy a Z register by ORRing with itself.
5479 if (AArch64::ZPRRegClass.contains(DestReg) &&
5480 AArch64::ZPRRegClass.contains(SrcReg)) {
5481 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5482 "Unexpected SVE register.");
5483 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ), DestReg)
5484 .addReg(SrcReg)
5485 .addReg(SrcReg, getKillRegState(KillSrc));
5486 return;
5487 }
5488
5489 // Copy a Z register pair by copying the individual sub-registers.
5490 if ((AArch64::ZPR2RegClass.contains(DestReg) ||
5491 AArch64::ZPR2StridedOrContiguousRegClass.contains(DestReg)) &&
5492 (AArch64::ZPR2RegClass.contains(SrcReg) ||
5493 AArch64::ZPR2StridedOrContiguousRegClass.contains(SrcReg))) {
5494 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5495 "Unexpected SVE register.");
5496 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1};
5497 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5498 Indices);
5499 return;
5500 }
5501
5502 // Copy a Z register triple by copying the individual sub-registers.
5503 if (AArch64::ZPR3RegClass.contains(DestReg) &&
5504 AArch64::ZPR3RegClass.contains(SrcReg)) {
5505 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5506 "Unexpected SVE register.");
5507 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5508 AArch64::zsub2};
5509 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5510 Indices);
5511 return;
5512 }
5513
5514 // Copy a Z register quad by copying the individual sub-registers.
5515 if ((AArch64::ZPR4RegClass.contains(DestReg) ||
5516 AArch64::ZPR4StridedOrContiguousRegClass.contains(DestReg)) &&
5517 (AArch64::ZPR4RegClass.contains(SrcReg) ||
5518 AArch64::ZPR4StridedOrContiguousRegClass.contains(SrcReg))) {
5519 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5520 "Unexpected SVE register.");
5521 static const unsigned Indices[] = {AArch64::zsub0, AArch64::zsub1,
5522 AArch64::zsub2, AArch64::zsub3};
5523 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORR_ZZZ,
5524 Indices);
5525 return;
5526 }
5527
5528 // Copy a DDDD register quad by copying the individual sub-registers.
5529 if (AArch64::DDDDRegClass.contains(DestReg) &&
5530 AArch64::DDDDRegClass.contains(SrcReg)) {
5531 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5532 AArch64::dsub2, AArch64::dsub3};
5533 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5534 Indices);
5535 return;
5536 }
5537
5538 // Copy a DDD register triple by copying the individual sub-registers.
5539 if (AArch64::DDDRegClass.contains(DestReg) &&
5540 AArch64::DDDRegClass.contains(SrcReg)) {
5541 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
5542 AArch64::dsub2};
5543 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5544 Indices);
5545 return;
5546 }
5547
5548 // Copy a DD register pair by copying the individual sub-registers.
5549 if (AArch64::DDRegClass.contains(DestReg) &&
5550 AArch64::DDRegClass.contains(SrcReg)) {
5551 static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
5552 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
5553 Indices);
5554 return;
5555 }
5556
5557 // Copy a QQQQ register quad by copying the individual sub-registers.
5558 if (AArch64::QQQQRegClass.contains(DestReg) &&
5559 AArch64::QQQQRegClass.contains(SrcReg)) {
5560 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5561 AArch64::qsub2, AArch64::qsub3};
5562 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5563 Indices);
5564 return;
5565 }
5566
5567 // Copy a QQQ register triple by copying the individual sub-registers.
5568 if (AArch64::QQQRegClass.contains(DestReg) &&
5569 AArch64::QQQRegClass.contains(SrcReg)) {
5570 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
5571 AArch64::qsub2};
5572 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5573 Indices);
5574 return;
5575 }
5576
5577 // Copy a QQ register pair by copying the individual sub-registers.
5578 if (AArch64::QQRegClass.contains(DestReg) &&
5579 AArch64::QQRegClass.contains(SrcReg)) {
5580 static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
5581 copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
5582 Indices);
5583 return;
5584 }
5585
5586 if (AArch64::XSeqPairsClassRegClass.contains(DestReg) &&
5587 AArch64::XSeqPairsClassRegClass.contains(SrcReg)) {
5588 static const unsigned Indices[] = {AArch64::sube64, AArch64::subo64};
5589 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRXrs,
5590 AArch64::XZR, Indices);
5591 return;
5592 }
5593
5594 if (AArch64::WSeqPairsClassRegClass.contains(DestReg) &&
5595 AArch64::WSeqPairsClassRegClass.contains(SrcReg)) {
5596 static const unsigned Indices[] = {AArch64::sube32, AArch64::subo32};
5597 copyGPRRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRWrs,
5598 AArch64::WZR, Indices);
5599 return;
5600 }
5601
5602 if (AArch64::FPR128RegClass.contains(DestReg) &&
5603 AArch64::FPR128RegClass.contains(SrcReg)) {
5604 if (Subtarget.isSVEorStreamingSVEAvailable() &&
5605 !Subtarget.isNeonAvailable())
5606 BuildMI(MBB, I, DL, get(AArch64::ORR_ZZZ))
5607 .addReg(AArch64::Z0 + (DestReg - AArch64::Q0), RegState::Define)
5608 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0))
5609 .addReg(AArch64::Z0 + (SrcReg - AArch64::Q0));
5610 else if (Subtarget.isNeonAvailable())
5611 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
5612 .addReg(SrcReg)
5613 .addReg(SrcReg, getKillRegState(KillSrc));
5614 else {
5615 BuildMI(MBB, I, DL, get(AArch64::STRQpre))
5616 .addReg(AArch64::SP, RegState::Define)
5617 .addReg(SrcReg, getKillRegState(KillSrc))
5618 .addReg(AArch64::SP)
5619 .addImm(-16);
5620 BuildMI(MBB, I, DL, get(AArch64::LDRQpost))
5621 .addReg(AArch64::SP, RegState::Define)
5622 .addReg(DestReg, RegState::Define)
5623 .addReg(AArch64::SP)
5624 .addImm(16);
5625 }
5626 return;
5627 }
5628
5629 if (AArch64::FPR64RegClass.contains(DestReg) &&
5630 AArch64::FPR64RegClass.contains(SrcReg)) {
5631 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5632 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5633 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5634 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
5635 &AArch64::FPR128RegClass);
5636 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
5637 &AArch64::FPR128RegClass);
5638 // This instruction is reading and writing Q registers. This may upset
5639 // the register scavenger and machine verifier, so we need to indicate
5640 // that we are reading an undefined value from SrcRegQ, but a proper
5641 // value from SrcReg.
5642 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5643 .addReg(SrcRegQ, RegState::Undef)
5644 .addReg(SrcRegQ, RegState::Undef)
5645 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5646 } else {
5647 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
5648 .addReg(SrcReg, getKillRegState(KillSrc));
5649 }
5650 return;
5651 }
5652
5653 if (AArch64::FPR32RegClass.contains(DestReg) &&
5654 AArch64::FPR32RegClass.contains(SrcReg)) {
5655 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5656 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5657 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5658 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5659 &AArch64::FPR128RegClass);
5660 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5661 &AArch64::FPR128RegClass);
5662 // This instruction is reading and writing Q registers. This may upset
5663 // the register scavenger and machine verifier, so we need to indicate
5664 // that we are reading an undefined value from SrcRegQ, but a proper
5665 // value from SrcReg.
5666 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5667 .addReg(SrcRegQ, RegState::Undef)
5668 .addReg(SrcRegQ, RegState::Undef)
5669 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5670 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5671 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5672 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
5673 &AArch64::FPR64RegClass);
5674 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
5675 &AArch64::FPR64RegClass);
5676 // This instruction is reading and writing D registers. This may upset
5677 // the register scavenger and machine verifier, so we need to indicate
5678 // that we are reading an undefined value from SrcRegD, but a proper
5679 // value from SrcReg.
5680 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5681 .addReg(SrcRegD, RegState::Undef)
5682 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5683 } else {
5684 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5685 .addReg(SrcReg, getKillRegState(KillSrc));
5686 }
5687 return;
5688 }
5689
5690 if (AArch64::FPR16RegClass.contains(DestReg) &&
5691 AArch64::FPR16RegClass.contains(SrcReg)) {
5692 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5693 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5694 !Subtarget.hasZeroCycleRegMoveFPR32() && Subtarget.isNeonAvailable()) {
5695 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5696 &AArch64::FPR128RegClass);
5697 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5698 &AArch64::FPR128RegClass);
5699 // This instruction is reading and writing Q registers. This may upset
5700 // the register scavenger and machine verifier, so we need to indicate
5701 // that we are reading an undefined value from SrcRegQ, but a proper
5702 // value from SrcReg.
5703 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5704 .addReg(SrcRegQ, RegState::Undef)
5705 .addReg(SrcRegQ, RegState::Undef)
5706 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5707 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5708 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5709 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5710 &AArch64::FPR64RegClass);
5711 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5712 &AArch64::FPR64RegClass);
5713 // This instruction is reading and writing D registers. This may upset
5714 // the register scavenger and machine verifier, so we need to indicate
5715 // that we are reading an undefined value from SrcRegD, but a proper
5716 // value from SrcReg.
5717 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5718 .addReg(SrcRegD, RegState::Undef)
5719 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5720 } else {
5721 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
5722 &AArch64::FPR32RegClass);
5723 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
5724 &AArch64::FPR32RegClass);
5725 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5726 .addReg(SrcReg, getKillRegState(KillSrc));
5727 }
5728 return;
5729 }
5730
5731 if (AArch64::FPR8RegClass.contains(DestReg) &&
5732 AArch64::FPR8RegClass.contains(SrcReg)) {
5733 if (Subtarget.hasZeroCycleRegMoveFPR128() &&
5734 !Subtarget.hasZeroCycleRegMoveFPR64() &&
5735 !Subtarget.hasZeroCycleRegMoveFPR64() && Subtarget.isNeonAvailable()) {
5736 MCRegister DestRegQ = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5737 &AArch64::FPR128RegClass);
5738 MCRegister SrcRegQ = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5739 &AArch64::FPR128RegClass);
5740 // This instruction is reading and writing Q registers. This may upset
5741 // the register scavenger and machine verifier, so we need to indicate
5742 // that we are reading an undefined value from SrcRegQ, but a proper
5743 // value from SrcReg.
5744 BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestRegQ)
5745 .addReg(SrcRegQ, RegState::Undef)
5746 .addReg(SrcRegQ, RegState::Undef)
5747 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5748 } else if (Subtarget.hasZeroCycleRegMoveFPR64() &&
5749 !Subtarget.hasZeroCycleRegMoveFPR32()) {
5750 MCRegister DestRegD = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5751 &AArch64::FPR64RegClass);
5752 MCRegister SrcRegD = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5753 &AArch64::FPR64RegClass);
5754 // This instruction is reading and writing D registers. This may upset
5755 // the register scavenger and machine verifier, so we need to indicate
5756 // that we are reading an undefined value from SrcRegD, but a proper
5757 // value from SrcReg.
5758 BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestRegD)
5759 .addReg(SrcRegD, RegState::Undef)
5760 .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
5761 } else {
5762 DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
5763 &AArch64::FPR32RegClass);
5764 SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
5765 &AArch64::FPR32RegClass);
5766 BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
5767 .addReg(SrcReg, getKillRegState(KillSrc));
5768 }
5769 return;
5770 }
5771
5772 // Copies between GPR64 and FPR64.
5773 if (AArch64::FPR64RegClass.contains(DestReg) &&
5774 AArch64::GPR64RegClass.contains(SrcReg)) {
5775 if (AArch64::XZR == SrcReg) {
5776 BuildMI(MBB, I, DL, get(AArch64::FMOVD0), DestReg);
5777 } else {
5778 BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
5779 .addReg(SrcReg, getKillRegState(KillSrc));
5780 }
5781 return;
5782 }
5783 if (AArch64::GPR64RegClass.contains(DestReg) &&
5784 AArch64::FPR64RegClass.contains(SrcReg)) {
5785 BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
5786 .addReg(SrcReg, getKillRegState(KillSrc));
5787 return;
5788 }
5789 // Copies between GPR32 and FPR32.
5790 if (AArch64::FPR32RegClass.contains(DestReg) &&
5791 AArch64::GPR32RegClass.contains(SrcReg)) {
5792 if (AArch64::WZR == SrcReg) {
5793 BuildMI(MBB, I, DL, get(AArch64::FMOVS0), DestReg);
5794 } else {
5795 BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
5796 .addReg(SrcReg, getKillRegState(KillSrc));
5797 }
5798 return;
5799 }
5800 if (AArch64::GPR32RegClass.contains(DestReg) &&
5801 AArch64::FPR32RegClass.contains(SrcReg)) {
5802 BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
5803 .addReg(SrcReg, getKillRegState(KillSrc));
5804 return;
5805 }
5806
5807 if (DestReg == AArch64::NZCV) {
5808 assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
5809 BuildMI(MBB, I, DL, get(AArch64::MSR))
5810 .addImm(AArch64SysReg::NZCV)
5811 .addReg(SrcReg, getKillRegState(KillSrc))
5812 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
5813 return;
5814 }
5815
5816 if (SrcReg == AArch64::NZCV) {
5817 assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
5818 BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
5819 .addImm(AArch64SysReg::NZCV)
5820 .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
5821 return;
5822 }
5823
5824#ifndef NDEBUG
5825 errs() << RI.getRegAsmName(DestReg) << " = COPY " << RI.getRegAsmName(SrcReg)
5826 << "\n";
5827#endif
5828 llvm_unreachable("unimplemented reg-to-reg copy");
5829}
5830
5833 MachineBasicBlock::iterator InsertBefore,
5834 const MCInstrDesc &MCID,
5835 Register SrcReg, bool IsKill,
5836 unsigned SubIdx0, unsigned SubIdx1, int FI,
5837 MachineMemOperand *MMO) {
5838 Register SrcReg0 = SrcReg;
5839 Register SrcReg1 = SrcReg;
5840 if (SrcReg.isPhysical()) {
5841 SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
5842 SubIdx0 = 0;
5843 SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
5844 SubIdx1 = 0;
5845 }
5846 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
5847 .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
5848 .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
5849 .addFrameIndex(FI)
5850 .addImm(0)
5851 .addMemOperand(MMO);
5852}
5853
5856 Register SrcReg, bool isKill, int FI,
5857 const TargetRegisterClass *RC,
5858 Register VReg,
5859 MachineInstr::MIFlag Flags) const {
5860 MachineFunction &MF = *MBB.getParent();
5861 MachineFrameInfo &MFI = MF.getFrameInfo();
5862
5864 MachineMemOperand *MMO =
5866 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
5867 unsigned Opc = 0;
5868 bool Offset = true;
5870 unsigned StackID = TargetStackID::Default;
5871 switch (RI.getSpillSize(*RC)) {
5872 case 1:
5873 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
5874 Opc = AArch64::STRBui;
5875 break;
5876 case 2: {
5877 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
5878 Opc = AArch64::STRHui;
5879 else if (AArch64::PNRRegClass.hasSubClassEq(RC) ||
5880 AArch64::PPRRegClass.hasSubClassEq(RC)) {
5881 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5882 "Unexpected register store without SVE store instructions");
5883 Opc = AArch64::STR_PXI;
5885 }
5886 break;
5887 }
5888 case 4:
5889 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
5890 Opc = AArch64::STRWui;
5891 if (SrcReg.isVirtual())
5892 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
5893 else
5894 assert(SrcReg != AArch64::WSP);
5895 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
5896 Opc = AArch64::STRSui;
5897 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
5898 Opc = AArch64::STR_PPXI;
5900 }
5901 break;
5902 case 8:
5903 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
5904 Opc = AArch64::STRXui;
5905 if (SrcReg.isVirtual())
5906 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
5907 else
5908 assert(SrcReg != AArch64::SP);
5909 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
5910 Opc = AArch64::STRDui;
5911 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
5913 get(AArch64::STPWi), SrcReg, isKill,
5914 AArch64::sube32, AArch64::subo32, FI, MMO);
5915 return;
5916 }
5917 break;
5918 case 16:
5919 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
5920 Opc = AArch64::STRQui;
5921 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
5922 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5923 Opc = AArch64::ST1Twov1d;
5924 Offset = false;
5925 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
5927 get(AArch64::STPXi), SrcReg, isKill,
5928 AArch64::sube64, AArch64::subo64, FI, MMO);
5929 return;
5930 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
5931 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5932 "Unexpected register store without SVE store instructions");
5933 Opc = AArch64::STR_ZXI;
5935 }
5936 break;
5937 case 24:
5938 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
5939 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5940 Opc = AArch64::ST1Threev1d;
5941 Offset = false;
5942 }
5943 break;
5944 case 32:
5945 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
5946 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5947 Opc = AArch64::ST1Fourv1d;
5948 Offset = false;
5949 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
5950 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5951 Opc = AArch64::ST1Twov2d;
5952 Offset = false;
5953 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5954 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5955 "Unexpected register store without SVE store instructions");
5956 Opc = AArch64::STR_ZZXI_STRIDED_CONTIGUOUS;
5958 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
5959 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5960 "Unexpected register store without SVE store instructions");
5961 Opc = AArch64::STR_ZZXI;
5963 }
5964 break;
5965 case 48:
5966 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
5967 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5968 Opc = AArch64::ST1Threev2d;
5969 Offset = false;
5970 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
5971 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5972 "Unexpected register store without SVE store instructions");
5973 Opc = AArch64::STR_ZZZXI;
5975 }
5976 break;
5977 case 64:
5978 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
5979 assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
5980 Opc = AArch64::ST1Fourv2d;
5981 Offset = false;
5982 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
5983 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5984 "Unexpected register store without SVE store instructions");
5985 Opc = AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS;
5987 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
5988 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
5989 "Unexpected register store without SVE store instructions");
5990 Opc = AArch64::STR_ZZZZXI;
5992 }
5993 break;
5994 }
5995 assert(Opc && "Unknown register class");
5996 MFI.setStackID(FI, StackID);
5997
5999 .addReg(SrcReg, getKillRegState(isKill))
6000 .addFrameIndex(FI);
6001
6002 if (Offset)
6003 MI.addImm(0);
6004 if (PNRReg.isValid())
6005 MI.addDef(PNRReg, RegState::Implicit);
6006 MI.addMemOperand(MMO);
6007}
6008
6011 MachineBasicBlock::iterator InsertBefore,
6012 const MCInstrDesc &MCID,
6013 Register DestReg, unsigned SubIdx0,
6014 unsigned SubIdx1, int FI,
6015 MachineMemOperand *MMO) {
6016 Register DestReg0 = DestReg;
6017 Register DestReg1 = DestReg;
6018 bool IsUndef = true;
6019 if (DestReg.isPhysical()) {
6020 DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
6021 SubIdx0 = 0;
6022 DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
6023 SubIdx1 = 0;
6024 IsUndef = false;
6025 }
6026 BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
6027 .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
6028 .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
6029 .addFrameIndex(FI)
6030 .addImm(0)
6031 .addMemOperand(MMO);
6032}
6033
6036 Register DestReg, int FI,
6037 const TargetRegisterClass *RC,
6038 Register VReg,
6039 MachineInstr::MIFlag Flags) const {
6040 MachineFunction &MF = *MBB.getParent();
6041 MachineFrameInfo &MFI = MF.getFrameInfo();
6043 MachineMemOperand *MMO =
6045 MFI.getObjectSize(FI), MFI.getObjectAlign(FI));
6046
6047 unsigned Opc = 0;
6048 bool Offset = true;
6049 unsigned StackID = TargetStackID::Default;
6051 switch (TRI.getSpillSize(*RC)) {
6052 case 1:
6053 if (AArch64::FPR8RegClass.hasSubClassEq(RC))
6054 Opc = AArch64::LDRBui;
6055 break;
6056 case 2: {
6057 bool IsPNR = AArch64::PNRRegClass.hasSubClassEq(RC);
6058 if (AArch64::FPR16RegClass.hasSubClassEq(RC))
6059 Opc = AArch64::LDRHui;
6060 else if (IsPNR || AArch64::PPRRegClass.hasSubClassEq(RC)) {
6061 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6062 "Unexpected register load without SVE load instructions");
6063 if (IsPNR)
6064 PNRReg = DestReg;
6065 Opc = AArch64::LDR_PXI;
6067 }
6068 break;
6069 }
6070 case 4:
6071 if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
6072 Opc = AArch64::LDRWui;
6073 if (DestReg.isVirtual())
6074 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
6075 else
6076 assert(DestReg != AArch64::WSP);
6077 } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
6078 Opc = AArch64::LDRSui;
6079 else if (AArch64::PPR2RegClass.hasSubClassEq(RC)) {
6080 Opc = AArch64::LDR_PPXI;
6082 }
6083 break;
6084 case 8:
6085 if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
6086 Opc = AArch64::LDRXui;
6087 if (DestReg.isVirtual())
6088 MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
6089 else
6090 assert(DestReg != AArch64::SP);
6091 } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
6092 Opc = AArch64::LDRDui;
6093 } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
6095 get(AArch64::LDPWi), DestReg, AArch64::sube32,
6096 AArch64::subo32, FI, MMO);
6097 return;
6098 }
6099 break;
6100 case 16:
6101 if (AArch64::FPR128RegClass.hasSubClassEq(RC))
6102 Opc = AArch64::LDRQui;
6103 else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
6104 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6105 Opc = AArch64::LD1Twov1d;
6106 Offset = false;
6107 } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
6109 get(AArch64::LDPXi), DestReg, AArch64::sube64,
6110 AArch64::subo64, FI, MMO);
6111 return;
6112 } else if (AArch64::ZPRRegClass.hasSubClassEq(RC)) {
6113 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6114 "Unexpected register load without SVE load instructions");
6115 Opc = AArch64::LDR_ZXI;
6117 }
6118 break;
6119 case 24:
6120 if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
6121 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6122 Opc = AArch64::LD1Threev1d;
6123 Offset = false;
6124 }
6125 break;
6126 case 32:
6127 if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
6128 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6129 Opc = AArch64::LD1Fourv1d;
6130 Offset = false;
6131 } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
6132 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6133 Opc = AArch64::LD1Twov2d;
6134 Offset = false;
6135 } else if (AArch64::ZPR2StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6136 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6137 "Unexpected register load without SVE load instructions");
6138 Opc = AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS;
6140 } else if (AArch64::ZPR2RegClass.hasSubClassEq(RC)) {
6141 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6142 "Unexpected register load without SVE load instructions");
6143 Opc = AArch64::LDR_ZZXI;
6145 }
6146 break;
6147 case 48:
6148 if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
6149 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6150 Opc = AArch64::LD1Threev2d;
6151 Offset = false;
6152 } else if (AArch64::ZPR3RegClass.hasSubClassEq(RC)) {
6153 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6154 "Unexpected register load without SVE load instructions");
6155 Opc = AArch64::LDR_ZZZXI;
6157 }
6158 break;
6159 case 64:
6160 if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
6161 assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
6162 Opc = AArch64::LD1Fourv2d;
6163 Offset = false;
6164 } else if (AArch64::ZPR4StridedOrContiguousRegClass.hasSubClassEq(RC)) {
6165 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6166 "Unexpected register load without SVE load instructions");
6167 Opc = AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS;
6169 } else if (AArch64::ZPR4RegClass.hasSubClassEq(RC)) {
6170 assert(Subtarget.isSVEorStreamingSVEAvailable() &&
6171 "Unexpected register load without SVE load instructions");
6172 Opc = AArch64::LDR_ZZZZXI;
6174 }
6175 break;
6176 }
6177
6178 assert(Opc && "Unknown register class");
6179 MFI.setStackID(FI, StackID);
6180
6182 .addReg(DestReg, getDefRegState(true))
6183 .addFrameIndex(FI);
6184 if (Offset)
6185 MI.addImm(0);
6186 if (PNRReg.isValid() && !PNRReg.isVirtual())
6187 MI.addDef(PNRReg, RegState::Implicit);
6188 MI.addMemOperand(MMO);
6189}
6190
6192 const MachineInstr &UseMI,
6193 const TargetRegisterInfo *TRI) {
6194 return any_of(instructionsWithoutDebug(std::next(DefMI.getIterator()),
6195 UseMI.getIterator()),
6196 [TRI](const MachineInstr &I) {
6197 return I.modifiesRegister(AArch64::NZCV, TRI) ||
6198 I.readsRegister(AArch64::NZCV, TRI);
6199 });
6200}
6201
6202void AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6203 const StackOffset &Offset, int64_t &ByteSized, int64_t &VGSized) {
6204 // The smallest scalable element supported by scaled SVE addressing
6205 // modes are predicates, which are 2 scalable bytes in size. So the scalable
6206 // byte offset must always be a multiple of 2.
6207 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
6208
6209 // VGSized offsets are divided by '2', because the VG register is the
6210 // the number of 64bit granules as opposed to 128bit vector chunks,
6211 // which is how the 'n' in e.g. MVT::nxv1i8 is modelled.
6212 // So, for a stack offset of 16 MVT::nxv1i8's, the size is n x 16 bytes.
6213 // VG = n * 2 and the dwarf offset must be VG * 8 bytes.
6214 ByteSized = Offset.getFixed();
6215 VGSized = Offset.getScalable() / 2;
6216}
6217
6218/// Returns the offset in parts to which this frame offset can be
6219/// decomposed for the purpose of describing a frame offset.
6220/// For non-scalable offsets this is simply its byte size.
6221void AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6222 const StackOffset &Offset, int64_t &NumBytes, int64_t &NumPredicateVectors,
6223 int64_t &NumDataVectors) {
6224 // The smallest scalable element supported by scaled SVE addressing
6225 // modes are predicates, which are 2 scalable bytes in size. So the scalable
6226 // byte offset must always be a multiple of 2.
6227 assert(Offset.getScalable() % 2 == 0 && "Invalid frame offset");
6228
6229 NumBytes = Offset.getFixed();
6230 NumDataVectors = 0;
6231 NumPredicateVectors = Offset.getScalable() / 2;
6232 // This method is used to get the offsets to adjust the frame offset.
6233 // If the function requires ADDPL to be used and needs more than two ADDPL
6234 // instructions, part of the offset is folded into NumDataVectors so that it
6235 // uses ADDVL for part of it, reducing the number of ADDPL instructions.
6236 if (NumPredicateVectors % 8 == 0 || NumPredicateVectors < -64 ||
6237 NumPredicateVectors > 62) {
6238 NumDataVectors = NumPredicateVectors / 8;
6239 NumPredicateVectors -= NumDataVectors * 8;
6240 }
6241}
6242
6243// Convenience function to create a DWARF expression for: Constant `Operation`.
6244// This helper emits compact sequences for common cases. For example, for`-15
6245// DW_OP_plus`, this helper would create DW_OP_lit15 DW_OP_minus.
6248 if (Operation == dwarf::DW_OP_plus && Constant < 0 && -Constant <= 31) {
6249 // -Constant (1 to 31)
6250 Expr.push_back(dwarf::DW_OP_lit0 - Constant);
6251 Operation = dwarf::DW_OP_minus;
6252 } else if (Constant >= 0 && Constant <= 31) {
6253 // Literal value 0 to 31
6254 Expr.push_back(dwarf::DW_OP_lit0 + Constant);
6255 } else {
6256 // Signed constant
6257 Expr.push_back(dwarf::DW_OP_consts);
6259 }
6260 return Expr.push_back(Operation);
6261}
6262
6263// Convenience function to create a DWARF expression for a register.
6264static void appendReadRegExpr(SmallVectorImpl<char> &Expr, unsigned RegNum) {
6265 Expr.push_back((char)dwarf::DW_OP_bregx);
6267 Expr.push_back(0);
6268}
6269
6270// Convenience function to create a DWARF expression for loading a register from
6271// a CFA offset.
6273 int64_t OffsetFromDefCFA) {
6274 // This assumes the top of the DWARF stack contains the CFA.
6275 Expr.push_back(dwarf::DW_OP_dup);
6276 // Add the offset to the register.
6277 appendConstantExpr(Expr, OffsetFromDefCFA, dwarf::DW_OP_plus);
6278 // Dereference the address (loads a 64 bit value)..
6279 Expr.push_back(dwarf::DW_OP_deref);
6280}
6281
6282// Convenience function to create a comment for
6283// (+/-) NumBytes (* RegScale)?
6284static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment,
6285 StringRef RegScale = {}) {
6286 if (NumBytes) {
6287 Comment << (NumBytes < 0 ? " - " : " + ") << std::abs(NumBytes);
6288 if (!RegScale.empty())
6289 Comment << ' ' << RegScale;
6290 }
6291}
6292
6293// Creates an MCCFIInstruction:
6294// { DW_CFA_def_cfa_expression, ULEB128 (sizeof expr), expr }
6296 unsigned Reg,
6297 const StackOffset &Offset) {
6298 int64_t NumBytes, NumVGScaledBytes;
6299 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(Offset, NumBytes,
6300 NumVGScaledBytes);
6301 std::string CommentBuffer;
6302 llvm::raw_string_ostream Comment(CommentBuffer);
6303
6304 if (Reg == AArch64::SP)
6305 Comment << "sp";
6306 else if (Reg == AArch64::FP)
6307 Comment << "fp";
6308 else
6309 Comment << printReg(Reg, &TRI);
6310
6311 // Build up the expression (Reg + NumBytes + VG * NumVGScaledBytes)
6312 SmallString<64> Expr;
6313 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6314 assert(DwarfReg <= 31 && "DwarfReg out of bounds (0..31)");
6315 // Reg + NumBytes
6316 Expr.push_back(dwarf::DW_OP_breg0 + DwarfReg);
6317 appendLEB128<LEB128Sign::Signed>(Expr, NumBytes);
6318 appendOffsetComment(NumBytes, Comment);
6319 if (NumVGScaledBytes) {
6320 // + VG * NumVGScaledBytes
6321 appendOffsetComment(NumVGScaledBytes, Comment, "* VG");
6322 appendReadRegExpr(Expr, TRI.getDwarfRegNum(AArch64::VG, true));
6323 appendConstantExpr(Expr, NumVGScaledBytes, dwarf::DW_OP_mul);
6324 Expr.push_back(dwarf::DW_OP_plus);
6325 }
6326
6327 // Wrap this into DW_CFA_def_cfa.
6328 SmallString<64> DefCfaExpr;
6329 DefCfaExpr.push_back(dwarf::DW_CFA_def_cfa_expression);
6330 appendLEB128<LEB128Sign::Unsigned>(DefCfaExpr, Expr.size());
6331 DefCfaExpr.append(Expr.str());
6332 return MCCFIInstruction::createEscape(nullptr, DefCfaExpr.str(), SMLoc(),
6333 Comment.str());
6334}
6335
6337 unsigned FrameReg, unsigned Reg,
6338 const StackOffset &Offset,
6339 bool LastAdjustmentWasScalable) {
6340 if (Offset.getScalable())
6341 return createDefCFAExpression(TRI, Reg, Offset);
6342
6343 if (FrameReg == Reg && !LastAdjustmentWasScalable)
6344 return MCCFIInstruction::cfiDefCfaOffset(nullptr, int(Offset.getFixed()));
6345
6346 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6347 return MCCFIInstruction::cfiDefCfa(nullptr, DwarfReg, (int)Offset.getFixed());
6348}
6349
6352 const StackOffset &OffsetFromDefCFA,
6353 std::optional<int64_t> IncomingVGOffsetFromDefCFA) {
6354 int64_t NumBytes, NumVGScaledBytes;
6355 AArch64InstrInfo::decomposeStackOffsetForDwarfOffsets(
6356 OffsetFromDefCFA, NumBytes, NumVGScaledBytes);
6357
6358 unsigned DwarfReg = TRI.getDwarfRegNum(Reg, true);
6359
6360 // Non-scalable offsets can use DW_CFA_offset directly.
6361 if (!NumVGScaledBytes)
6362 return MCCFIInstruction::createOffset(nullptr, DwarfReg, NumBytes);
6363
6364 std::string CommentBuffer;
6365 llvm::raw_string_ostream Comment(CommentBuffer);
6366 Comment << printReg(Reg, &TRI) << " @ cfa";
6367
6368 // Build up expression (CFA + VG * NumVGScaledBytes + NumBytes)
6369 assert(NumVGScaledBytes && "Expected scalable offset");
6370 SmallString<64> OffsetExpr;
6371 // + VG * NumVGScaledBytes
6372 StringRef VGRegScale;
6373 if (IncomingVGOffsetFromDefCFA) {
6374 appendLoadRegExpr(OffsetExpr, *IncomingVGOffsetFromDefCFA);
6375 VGRegScale = "* IncomingVG";
6376 } else {
6377 appendReadRegExpr(OffsetExpr, TRI.getDwarfRegNum(AArch64::VG, true));
6378 VGRegScale = "* VG";
6379 }
6380 appendConstantExpr(OffsetExpr, NumVGScaledBytes, dwarf::DW_OP_mul);
6381 appendOffsetComment(NumVGScaledBytes, Comment, VGRegScale);
6382 OffsetExpr.push_back(dwarf::DW_OP_plus);
6383 if (NumBytes) {
6384 // + NumBytes
6385 appendOffsetComment(NumBytes, Comment);
6386 appendConstantExpr(OffsetExpr, NumBytes, dwarf::DW_OP_plus);
6387 }
6388
6389 // Wrap this into DW_CFA_expression
6390 SmallString<64> CfaExpr;
6391 CfaExpr.push_back(dwarf::DW_CFA_expression);
6392 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, DwarfReg);
6393 appendLEB128<LEB128Sign::Unsigned>(CfaExpr, OffsetExpr.size());
6394 CfaExpr.append(OffsetExpr.str());
6395
6396 return MCCFIInstruction::createEscape(nullptr, CfaExpr.str(), SMLoc(),
6397 Comment.str());
6398}
6399
6400// Helper function to emit a frame offset adjustment from a given
6401// pointer (SrcReg), stored into DestReg. This function is explicit
6402// in that it requires the opcode.
6405 const DebugLoc &DL, unsigned DestReg,
6406 unsigned SrcReg, int64_t Offset, unsigned Opc,
6407 const TargetInstrInfo *TII,
6408 MachineInstr::MIFlag Flag, bool NeedsWinCFI,
6409 bool *HasWinCFI, bool EmitCFAOffset,
6410 StackOffset CFAOffset, unsigned FrameReg) {
6411 int Sign = 1;
6412 unsigned MaxEncoding, ShiftSize;
6413 switch (Opc) {
6414 case AArch64::ADDXri:
6415 case AArch64::ADDSXri:
6416 case AArch64::SUBXri:
6417 case AArch64::SUBSXri:
6418 MaxEncoding = 0xfff;
6419 ShiftSize = 12;
6420 break;
6421 case AArch64::ADDVL_XXI:
6422 case AArch64::ADDPL_XXI:
6423 case AArch64::ADDSVL_XXI:
6424 case AArch64::ADDSPL_XXI:
6425 MaxEncoding = 31;
6426 ShiftSize = 0;
6427 if (Offset < 0) {
6428 MaxEncoding = 32;
6429 Sign = -1;
6430 Offset = -Offset;
6431 }
6432 break;
6433 default:
6434 llvm_unreachable("Unsupported opcode");
6435 }
6436
6437 // `Offset` can be in bytes or in "scalable bytes".
6438 int VScale = 1;
6439 if (Opc == AArch64::ADDVL_XXI || Opc == AArch64::ADDSVL_XXI)
6440 VScale = 16;
6441 else if (Opc == AArch64::ADDPL_XXI || Opc == AArch64::ADDSPL_XXI)
6442 VScale = 2;
6443
6444 // FIXME: If the offset won't fit in 24-bits, compute the offset into a
6445 // scratch register. If DestReg is a virtual register, use it as the
6446 // scratch register; otherwise, create a new virtual register (to be
6447 // replaced by the scavenger at the end of PEI). That case can be optimized
6448 // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
6449 // register can be loaded with offset%8 and the add/sub can use an extending
6450 // instruction with LSL#3.
6451 // Currently the function handles any offsets but generates a poor sequence
6452 // of code.
6453 // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
6454
6455 const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
6456 Register TmpReg = DestReg;
6457 if (TmpReg == AArch64::XZR)
6458 TmpReg = MBB.getParent()->getRegInfo().createVirtualRegister(
6459 &AArch64::GPR64RegClass);
6460 do {
6461 uint64_t ThisVal = std::min<uint64_t>(Offset, MaxEncodableValue);
6462 unsigned LocalShiftSize = 0;
6463 if (ThisVal > MaxEncoding) {
6464 ThisVal = ThisVal >> ShiftSize;
6465 LocalShiftSize = ShiftSize;
6466 }
6467 assert((ThisVal >> ShiftSize) <= MaxEncoding &&
6468 "Encoding cannot handle value that big");
6469
6470 Offset -= ThisVal << LocalShiftSize;
6471 if (Offset == 0)
6472 TmpReg = DestReg;
6473 auto MBI = BuildMI(MBB, MBBI, DL, TII->get(Opc), TmpReg)
6474 .addReg(SrcReg)
6475 .addImm(Sign * (int)ThisVal);
6476 if (ShiftSize)
6477 MBI = MBI.addImm(
6479 MBI = MBI.setMIFlag(Flag);
6480
6481 auto Change =
6482 VScale == 1
6483 ? StackOffset::getFixed(ThisVal << LocalShiftSize)
6484 : StackOffset::getScalable(VScale * (ThisVal << LocalShiftSize));
6485 if (Sign == -1 || Opc == AArch64::SUBXri || Opc == AArch64::SUBSXri)
6486 CFAOffset += Change;
6487 else
6488 CFAOffset -= Change;
6489 if (EmitCFAOffset && DestReg == TmpReg) {
6490 MachineFunction &MF = *MBB.getParent();
6491 const TargetSubtargetInfo &STI = MF.getSubtarget();
6492 const TargetRegisterInfo &TRI = *STI.getRegisterInfo();
6493
6494 unsigned CFIIndex = MF.addFrameInst(
6495 createDefCFA(TRI, FrameReg, DestReg, CFAOffset, VScale != 1));
6496 BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION))
6497 .addCFIIndex(CFIIndex)
6498 .setMIFlags(Flag);
6499 }
6500
6501 if (NeedsWinCFI) {
6502 int Imm = (int)(ThisVal << LocalShiftSize);
6503 if (VScale != 1 && DestReg == AArch64::SP) {
6504 if (HasWinCFI)
6505 *HasWinCFI = true;
6506 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AllocZ))
6507 .addImm(ThisVal)
6508 .setMIFlag(Flag);
6509 } else if ((DestReg == AArch64::FP && SrcReg == AArch64::SP) ||
6510 (SrcReg == AArch64::FP && DestReg == AArch64::SP)) {
6511 assert(VScale == 1 && "Expected non-scalable operation");
6512 if (HasWinCFI)
6513 *HasWinCFI = true;
6514 if (Imm == 0)
6515 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_SetFP)).setMIFlag(Flag);
6516 else
6517 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_AddFP))
6518 .addImm(Imm)
6519 .setMIFlag(Flag);
6520 assert(Offset == 0 && "Expected remaining offset to be zero to "
6521 "emit a single SEH directive");
6522 } else if (DestReg == AArch64::SP) {
6523 assert(VScale == 1 && "Expected non-scalable operation");
6524 if (HasWinCFI)
6525 *HasWinCFI = true;
6526 assert(SrcReg == AArch64::SP && "Unexpected SrcReg for SEH_StackAlloc");
6527 BuildMI(MBB, MBBI, DL, TII->get(AArch64::SEH_StackAlloc))
6528 .addImm(Imm)
6529 .setMIFlag(Flag);
6530 }
6531 }
6532
6533 SrcReg = TmpReg;
6534 } while (Offset);
6535}
6536
6539 unsigned DestReg, unsigned SrcReg,
6541 MachineInstr::MIFlag Flag, bool SetNZCV,
6542 bool NeedsWinCFI, bool *HasWinCFI,
6543 bool EmitCFAOffset, StackOffset CFAOffset,
6544 unsigned FrameReg) {
6545 // If a function is marked as arm_locally_streaming, then the runtime value of
6546 // vscale in the prologue/epilogue is different the runtime value of vscale
6547 // in the function's body. To avoid having to consider multiple vscales,
6548 // we can use `addsvl` to allocate any scalable stack-slots, which under
6549 // most circumstances will be only locals, not callee-save slots.
6550 const Function &F = MBB.getParent()->getFunction();
6551 bool UseSVL = F.hasFnAttribute("aarch64_pstate_sm_body");
6552
6553 int64_t Bytes, NumPredicateVectors, NumDataVectors;
6554 AArch64InstrInfo::decomposeStackOffsetForFrameOffsets(
6555 Offset, Bytes, NumPredicateVectors, NumDataVectors);
6556
6557 // Insert ADDSXri for scalable offset at the end.
6558 bool NeedsFinalDefNZCV = SetNZCV && (NumPredicateVectors || NumDataVectors);
6559 if (NeedsFinalDefNZCV)
6560 SetNZCV = false;
6561
6562 // First emit non-scalable frame offsets, or a simple 'mov'.
6563 if (Bytes || (!Offset && SrcReg != DestReg)) {
6564 assert((DestReg != AArch64::SP || Bytes % 8 == 0) &&
6565 "SP increment/decrement not 8-byte aligned");
6566 unsigned Opc = SetNZCV ? AArch64::ADDSXri : AArch64::ADDXri;
6567 if (Bytes < 0) {
6568 Bytes = -Bytes;
6569 Opc = SetNZCV ? AArch64::SUBSXri : AArch64::SUBXri;
6570 }
6571 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, Bytes, Opc, TII, Flag,
6572 NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6573 FrameReg);
6574 CFAOffset += (Opc == AArch64::ADDXri || Opc == AArch64::ADDSXri)
6575 ? StackOffset::getFixed(-Bytes)
6576 : StackOffset::getFixed(Bytes);
6577 SrcReg = DestReg;
6578 FrameReg = DestReg;
6579 }
6580
6581 assert(!(NeedsWinCFI && NumPredicateVectors) &&
6582 "WinCFI can't allocate fractions of an SVE data vector");
6583
6584 if (NumDataVectors) {
6585 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumDataVectors,
6586 UseSVL ? AArch64::ADDSVL_XXI : AArch64::ADDVL_XXI, TII,
6587 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6588 FrameReg);
6589 CFAOffset += StackOffset::getScalable(-NumDataVectors * 16);
6590 SrcReg = DestReg;
6591 }
6592
6593 if (NumPredicateVectors) {
6594 assert(DestReg != AArch64::SP && "Unaligned access to SP");
6595 emitFrameOffsetAdj(MBB, MBBI, DL, DestReg, SrcReg, NumPredicateVectors,
6596 UseSVL ? AArch64::ADDSPL_XXI : AArch64::ADDPL_XXI, TII,
6597 Flag, NeedsWinCFI, HasWinCFI, EmitCFAOffset, CFAOffset,
6598 FrameReg);
6599 }
6600
6601 if (NeedsFinalDefNZCV)
6602 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDSXri), DestReg)
6603 .addReg(DestReg)
6604 .addImm(0)
6605 .addImm(0);
6606}
6607
6610 MachineBasicBlock::iterator InsertPt, int FrameIndex,
6611 LiveIntervals *LIS, VirtRegMap *VRM) const {
6612 // This is a bit of a hack. Consider this instruction:
6613 //
6614 // %0 = COPY %sp; GPR64all:%0
6615 //
6616 // We explicitly chose GPR64all for the virtual register so such a copy might
6617 // be eliminated by RegisterCoalescer. However, that may not be possible, and
6618 // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
6619 // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
6620 //
6621 // To prevent that, we are going to constrain the %0 register class here.
6622 if (MI.isFullCopy()) {
6623 Register DstReg = MI.getOperand(0).getReg();
6624 Register SrcReg = MI.getOperand(1).getReg();
6625 if (SrcReg == AArch64::SP && DstReg.isVirtual()) {
6626 MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
6627 return nullptr;
6628 }
6629 if (DstReg == AArch64::SP && SrcReg.isVirtual()) {
6630 MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
6631 return nullptr;
6632 }
6633 // Nothing can folded with copy from/to NZCV.
6634 if (SrcReg == AArch64::NZCV || DstReg == AArch64::NZCV)
6635 return nullptr;
6636 }
6637
6638 // Handle the case where a copy is being spilled or filled but the source
6639 // and destination register class don't match. For example:
6640 //
6641 // %0 = COPY %xzr; GPR64common:%0
6642 //
6643 // In this case we can still safely fold away the COPY and generate the
6644 // following spill code:
6645 //
6646 // STRXui %xzr, %stack.0
6647 //
6648 // This also eliminates spilled cross register class COPYs (e.g. between x and
6649 // d regs) of the same size. For example:
6650 //
6651 // %0 = COPY %1; GPR64:%0, FPR64:%1
6652 //
6653 // will be filled as
6654 //
6655 // LDRDui %0, fi<#0>
6656 //
6657 // instead of
6658 //
6659 // LDRXui %Temp, fi<#0>
6660 // %0 = FMOV %Temp
6661 //
6662 if (MI.isCopy() && Ops.size() == 1 &&
6663 // Make sure we're only folding the explicit COPY defs/uses.
6664 (Ops[0] == 0 || Ops[0] == 1)) {
6665 bool IsSpill = Ops[0] == 0;
6666 bool IsFill = !IsSpill;
6668 const MachineRegisterInfo &MRI = MF.getRegInfo();
6669 MachineBasicBlock &MBB = *MI.getParent();
6670 const MachineOperand &DstMO = MI.getOperand(0);
6671 const MachineOperand &SrcMO = MI.getOperand(1);
6672 Register DstReg = DstMO.getReg();
6673 Register SrcReg = SrcMO.getReg();
6674 // This is slightly expensive to compute for physical regs since
6675 // getMinimalPhysRegClass is slow.
6676 auto getRegClass = [&](unsigned Reg) {
6677 return Register::isVirtualRegister(Reg) ? MRI.getRegClass(Reg)
6678 : TRI.getMinimalPhysRegClass(Reg);
6679 };
6680
6681 if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
6682 assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
6683 TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
6684 "Mismatched register size in non subreg COPY");
6685 if (IsSpill)
6686 storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
6687 getRegClass(SrcReg), Register());
6688 else
6689 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
6690 getRegClass(DstReg), Register());
6691 return &*--InsertPt;
6692 }
6693
6694 // Handle cases like spilling def of:
6695 //
6696 // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
6697 //
6698 // where the physical register source can be widened and stored to the full
6699 // virtual reg destination stack slot, in this case producing:
6700 //
6701 // STRXui %xzr, %stack.0
6702 //
6703 if (IsSpill && DstMO.isUndef() && SrcReg == AArch64::WZR &&
6704 TRI.getRegSizeInBits(*getRegClass(DstReg)) == 64) {
6705 assert(SrcMO.getSubReg() == 0 &&
6706 "Unexpected subreg on physical register");
6707 storeRegToStackSlot(MBB, InsertPt, AArch64::XZR, SrcMO.isKill(),
6708 FrameIndex, &AArch64::GPR64RegClass, Register());
6709 return &*--InsertPt;
6710 }
6711
6712 // Handle cases like filling use of:
6713 //
6714 // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
6715 //
6716 // where we can load the full virtual reg source stack slot, into the subreg
6717 // destination, in this case producing:
6718 //
6719 // LDRWui %0:sub_32<def,read-undef>, %stack.0
6720 //
6721 if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
6722 const TargetRegisterClass *FillRC = nullptr;
6723 switch (DstMO.getSubReg()) {
6724 default:
6725 break;
6726 case AArch64::sub_32:
6727 if (AArch64::GPR64RegClass.hasSubClassEq(getRegClass(DstReg)))
6728 FillRC = &AArch64::GPR32RegClass;
6729 break;
6730 case AArch64::ssub:
6731 FillRC = &AArch64::FPR32RegClass;
6732 break;
6733 case AArch64::dsub:
6734 FillRC = &AArch64::FPR64RegClass;
6735 break;
6736 }
6737
6738 if (FillRC) {
6739 assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
6740 TRI.getRegSizeInBits(*FillRC) &&
6741 "Mismatched regclass size on folded subreg COPY");
6742 loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC,
6743 Register());
6744 MachineInstr &LoadMI = *--InsertPt;
6745 MachineOperand &LoadDst = LoadMI.getOperand(0);
6746 assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
6747 LoadDst.setSubReg(DstMO.getSubReg());
6748 LoadDst.setIsUndef();
6749 return &LoadMI;
6750 }
6751 }
6752 }
6753
6754 // Cannot fold.
6755 return nullptr;
6756}
6757
6759 StackOffset &SOffset,
6760 bool *OutUseUnscaledOp,
6761 unsigned *OutUnscaledOp,
6762 int64_t *EmittableOffset) {
6763 // Set output values in case of early exit.
6764 if (EmittableOffset)
6765 *EmittableOffset = 0;
6766 if (OutUseUnscaledOp)
6767 *OutUseUnscaledOp = false;
6768 if (OutUnscaledOp)
6769 *OutUnscaledOp = 0;
6770
6771 // Exit early for structured vector spills/fills as they can't take an
6772 // immediate offset.
6773 switch (MI.getOpcode()) {
6774 default:
6775 break;
6776 case AArch64::LD1Rv1d:
6777 case AArch64::LD1Rv2s:
6778 case AArch64::LD1Rv2d:
6779 case AArch64::LD1Rv4h:
6780 case AArch64::LD1Rv4s:
6781 case AArch64::LD1Rv8b:
6782 case AArch64::LD1Rv8h:
6783 case AArch64::LD1Rv16b:
6784 case AArch64::LD1Twov2d:
6785 case AArch64::LD1Threev2d:
6786 case AArch64::LD1Fourv2d:
6787 case AArch64::LD1Twov1d:
6788 case AArch64::LD1Threev1d:
6789 case AArch64::LD1Fourv1d:
6790 case AArch64::ST1Twov2d:
6791 case AArch64::ST1Threev2d:
6792 case AArch64::ST1Fourv2d:
6793 case AArch64::ST1Twov1d:
6794 case AArch64::ST1Threev1d:
6795 case AArch64::ST1Fourv1d:
6796 case AArch64::ST1i8:
6797 case AArch64::ST1i16:
6798 case AArch64::ST1i32:
6799 case AArch64::ST1i64:
6800 case AArch64::IRG:
6801 case AArch64::IRGstack:
6802 case AArch64::STGloop:
6803 case AArch64::STZGloop:
6805 }
6806
6807 // Get the min/max offset and the scale.
6808 TypeSize ScaleValue(0U, false), Width(0U, false);
6809 int64_t MinOff, MaxOff;
6810 if (!AArch64InstrInfo::getMemOpInfo(MI.getOpcode(), ScaleValue, Width, MinOff,
6811 MaxOff))
6812 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6813
6814 // Construct the complete offset.
6815 bool IsMulVL = ScaleValue.isScalable();
6816 unsigned Scale = ScaleValue.getKnownMinValue();
6817 int64_t Offset = IsMulVL ? SOffset.getScalable() : SOffset.getFixed();
6818
6819 const MachineOperand &ImmOpnd =
6820 MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
6821 Offset += ImmOpnd.getImm() * Scale;
6822
6823 // If the offset doesn't match the scale, we rewrite the instruction to
6824 // use the unscaled instruction instead. Likewise, if we have a negative
6825 // offset and there is an unscaled op to use.
6826 std::optional<unsigned> UnscaledOp =
6828 bool useUnscaledOp = UnscaledOp && (Offset % Scale || Offset < 0);
6829 if (useUnscaledOp &&
6830 !AArch64InstrInfo::getMemOpInfo(*UnscaledOp, ScaleValue, Width, MinOff,
6831 MaxOff))
6832 llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
6833
6834 Scale = ScaleValue.getKnownMinValue();
6835 assert(IsMulVL == ScaleValue.isScalable() &&
6836 "Unscaled opcode has different value for scalable");
6837
6838 int64_t Remainder = Offset % Scale;
6839 assert(!(Remainder && useUnscaledOp) &&
6840 "Cannot have remainder when using unscaled op");
6841
6842 assert(MinOff < MaxOff && "Unexpected Min/Max offsets");
6843 int64_t NewOffset = Offset / Scale;
6844 if (MinOff <= NewOffset && NewOffset <= MaxOff)
6845 Offset = Remainder;
6846 else {
6847 NewOffset = NewOffset < 0 ? MinOff : MaxOff;
6848 Offset = Offset - (NewOffset * Scale);
6849 }
6850
6851 if (EmittableOffset)
6852 *EmittableOffset = NewOffset;
6853 if (OutUseUnscaledOp)
6854 *OutUseUnscaledOp = useUnscaledOp;
6855 if (OutUnscaledOp && UnscaledOp)
6856 *OutUnscaledOp = *UnscaledOp;
6857
6858 if (IsMulVL)
6859 SOffset = StackOffset::get(SOffset.getFixed(), Offset);
6860 else
6861 SOffset = StackOffset::get(Offset, SOffset.getScalable());
6863 (SOffset ? 0 : AArch64FrameOffsetIsLegal);
6864}
6865
6867 unsigned FrameReg, StackOffset &Offset,
6868 const AArch64InstrInfo *TII) {
6869 unsigned Opcode = MI.getOpcode();
6870 unsigned ImmIdx = FrameRegIdx + 1;
6871
6872 if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
6873 Offset += StackOffset::getFixed(MI.getOperand(ImmIdx).getImm());
6874 emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
6875 MI.getOperand(0).getReg(), FrameReg, Offset, TII,
6876 MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
6877 MI.eraseFromParent();
6878 Offset = StackOffset();
6879 return true;
6880 }
6881
6882 int64_t NewOffset;
6883 unsigned UnscaledOp;
6884 bool UseUnscaledOp;
6885 int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
6886 &UnscaledOp, &NewOffset);
6889 // Replace the FrameIndex with FrameReg.
6890 MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
6891 if (UseUnscaledOp)
6892 MI.setDesc(TII->get(UnscaledOp));
6893
6894 MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
6895 return !Offset;
6896 }
6897
6898 return false;
6899}
6900
6906
6908 return MCInstBuilder(AArch64::HINT).addImm(0);
6909}
6910
6911// AArch64 supports MachineCombiner.
6912bool AArch64InstrInfo::useMachineCombiner() const { return true; }
6913
6914// True when Opc sets flag
6915static bool isCombineInstrSettingFlag(unsigned Opc) {
6916 switch (Opc) {
6917 case AArch64::ADDSWrr:
6918 case AArch64::ADDSWri:
6919 case AArch64::ADDSXrr:
6920 case AArch64::ADDSXri:
6921 case AArch64::SUBSWrr:
6922 case AArch64::SUBSXrr:
6923 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6924 case AArch64::SUBSWri:
6925 case AArch64::SUBSXri:
6926 return true;
6927 default:
6928 break;
6929 }
6930 return false;
6931}
6932
6933// 32b Opcodes that can be combined with a MUL
6934static bool isCombineInstrCandidate32(unsigned Opc) {
6935 switch (Opc) {
6936 case AArch64::ADDWrr:
6937 case AArch64::ADDWri:
6938 case AArch64::SUBWrr:
6939 case AArch64::ADDSWrr:
6940 case AArch64::ADDSWri:
6941 case AArch64::SUBSWrr:
6942 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6943 case AArch64::SUBWri:
6944 case AArch64::SUBSWri:
6945 return true;
6946 default:
6947 break;
6948 }
6949 return false;
6950}
6951
6952// 64b Opcodes that can be combined with a MUL
6953static bool isCombineInstrCandidate64(unsigned Opc) {
6954 switch (Opc) {
6955 case AArch64::ADDXrr:
6956 case AArch64::ADDXri:
6957 case AArch64::SUBXrr:
6958 case AArch64::ADDSXrr:
6959 case AArch64::ADDSXri:
6960 case AArch64::SUBSXrr:
6961 // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
6962 case AArch64::SUBXri:
6963 case AArch64::SUBSXri:
6964 case AArch64::ADDv8i8:
6965 case AArch64::ADDv16i8:
6966 case AArch64::ADDv4i16:
6967 case AArch64::ADDv8i16:
6968 case AArch64::ADDv2i32:
6969 case AArch64::ADDv4i32:
6970 case AArch64::SUBv8i8:
6971 case AArch64::SUBv16i8:
6972 case AArch64::SUBv4i16:
6973 case AArch64::SUBv8i16:
6974 case AArch64::SUBv2i32:
6975 case AArch64::SUBv4i32:
6976 return true;
6977 default:
6978 break;
6979 }
6980 return false;
6981}
6982
6983// FP Opcodes that can be combined with a FMUL.
6984static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
6985 switch (Inst.getOpcode()) {
6986 default:
6987 break;
6988 case AArch64::FADDHrr:
6989 case AArch64::FADDSrr:
6990 case AArch64::FADDDrr:
6991 case AArch64::FADDv4f16:
6992 case AArch64::FADDv8f16:
6993 case AArch64::FADDv2f32:
6994 case AArch64::FADDv2f64:
6995 case AArch64::FADDv4f32:
6996 case AArch64::FSUBHrr:
6997 case AArch64::FSUBSrr:
6998 case AArch64::FSUBDrr:
6999 case AArch64::FSUBv4f16:
7000 case AArch64::FSUBv8f16:
7001 case AArch64::FSUBv2f32:
7002 case AArch64::FSUBv2f64:
7003 case AArch64::FSUBv4f32:
7005 // We can fuse FADD/FSUB with FMUL, if fusion is either allowed globally by
7006 // the target options or if FADD/FSUB has the contract fast-math flag.
7007 return Options.AllowFPOpFusion == FPOpFusion::Fast ||
7009 }
7010 return false;
7011}
7012
7013// Opcodes that can be combined with a MUL
7017
7018//
7019// Utility routine that checks if \param MO is defined by an
7020// \param CombineOpc instruction in the basic block \param MBB
7022 unsigned CombineOpc, unsigned ZeroReg = 0,
7023 bool CheckZeroReg = false) {
7024 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7025 MachineInstr *MI = nullptr;
7026
7027 if (MO.isReg() && MO.getReg().isVirtual())
7028 MI = MRI.getUniqueVRegDef(MO.getReg());
7029 // And it needs to be in the trace (otherwise, it won't have a depth).
7030 if (!MI || MI->getParent() != &MBB || MI->getOpcode() != CombineOpc)
7031 return false;
7032 // Must only used by the user we combine with.
7033 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
7034 return false;
7035
7036 if (CheckZeroReg) {
7037 assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
7038 MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
7039 MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
7040 // The third input reg must be zero.
7041 if (MI->getOperand(3).getReg() != ZeroReg)
7042 return false;
7043 }
7044
7045 if (isCombineInstrSettingFlag(CombineOpc) &&
7046 MI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) == -1)
7047 return false;
7048
7049 return true;
7050}
7051
7052//
7053// Is \param MO defined by an integer multiply and can be combined?
7055 unsigned MulOpc, unsigned ZeroReg) {
7056 return canCombine(MBB, MO, MulOpc, ZeroReg, true);
7057}
7058
7059//
7060// Is \param MO defined by a floating-point multiply and can be combined?
7062 unsigned MulOpc) {
7063 return canCombine(MBB, MO, MulOpc);
7064}
7065
7066// TODO: There are many more machine instruction opcodes to match:
7067// 1. Other data types (integer, vectors)
7068// 2. Other math / logic operations (xor, or)
7069// 3. Other forms of the same operation (intrinsics and other variants)
7070bool AArch64InstrInfo::isAssociativeAndCommutative(const MachineInstr &Inst,
7071 bool Invert) const {
7072 if (Invert)
7073 return false;
7074 switch (Inst.getOpcode()) {
7075 // == Floating-point types ==
7076 // -- Floating-point instructions --
7077 case AArch64::FADDHrr:
7078 case AArch64::FADDSrr:
7079 case AArch64::FADDDrr:
7080 case AArch64::FMULHrr:
7081 case AArch64::FMULSrr:
7082 case AArch64::FMULDrr:
7083 case AArch64::FMULX16:
7084 case AArch64::FMULX32:
7085 case AArch64::FMULX64:
7086 // -- Advanced SIMD instructions --
7087 case AArch64::FADDv4f16:
7088 case AArch64::FADDv8f16:
7089 case AArch64::FADDv2f32:
7090 case AArch64::FADDv4f32:
7091 case AArch64::FADDv2f64:
7092 case AArch64::FMULv4f16:
7093 case AArch64::FMULv8f16:
7094 case AArch64::FMULv2f32:
7095 case AArch64::FMULv4f32:
7096 case AArch64::FMULv2f64:
7097 case AArch64::FMULXv4f16:
7098 case AArch64::FMULXv8f16:
7099 case AArch64::FMULXv2f32:
7100 case AArch64::FMULXv4f32:
7101 case AArch64::FMULXv2f64:
7102 // -- SVE instructions --
7103 // Opcodes FMULX_ZZZ_? don't exist because there is no unpredicated FMULX
7104 // in the SVE instruction set (though there are predicated ones).
7105 case AArch64::FADD_ZZZ_H:
7106 case AArch64::FADD_ZZZ_S:
7107 case AArch64::FADD_ZZZ_D:
7108 case AArch64::FMUL_ZZZ_H:
7109 case AArch64::FMUL_ZZZ_S:
7110 case AArch64::FMUL_ZZZ_D:
7113
7114 // == Integer types ==
7115 // -- Base instructions --
7116 // Opcodes MULWrr and MULXrr don't exist because
7117 // `MUL <Wd>, <Wn>, <Wm>` and `MUL <Xd>, <Xn>, <Xm>` are aliases of
7118 // `MADD <Wd>, <Wn>, <Wm>, WZR` and `MADD <Xd>, <Xn>, <Xm>, XZR` respectively.
7119 // The machine-combiner does not support three-source-operands machine
7120 // instruction. So we cannot reassociate MULs.
7121 case AArch64::ADDWrr:
7122 case AArch64::ADDXrr:
7123 case AArch64::ANDWrr:
7124 case AArch64::ANDXrr:
7125 case AArch64::ORRWrr:
7126 case AArch64::ORRXrr:
7127 case AArch64::EORWrr:
7128 case AArch64::EORXrr:
7129 case AArch64::EONWrr:
7130 case AArch64::EONXrr:
7131 // -- Advanced SIMD instructions --
7132 // Opcodes MULv1i64 and MULv2i64 don't exist because there is no 64-bit MUL
7133 // in the Advanced SIMD instruction set.
7134 case AArch64::ADDv8i8:
7135 case AArch64::ADDv16i8:
7136 case AArch64::ADDv4i16:
7137 case AArch64::ADDv8i16:
7138 case AArch64::ADDv2i32:
7139 case AArch64::ADDv4i32:
7140 case AArch64::ADDv1i64:
7141 case AArch64::ADDv2i64:
7142 case AArch64::MULv8i8:
7143 case AArch64::MULv16i8:
7144 case AArch64::MULv4i16:
7145 case AArch64::MULv8i16:
7146 case AArch64::MULv2i32:
7147 case AArch64::MULv4i32:
7148 case AArch64::ANDv8i8:
7149 case AArch64::ANDv16i8:
7150 case AArch64::ORRv8i8:
7151 case AArch64::ORRv16i8:
7152 case AArch64::EORv8i8:
7153 case AArch64::EORv16i8:
7154 // -- SVE instructions --
7155 case AArch64::ADD_ZZZ_B:
7156 case AArch64::ADD_ZZZ_H:
7157 case AArch64::ADD_ZZZ_S:
7158 case AArch64::ADD_ZZZ_D:
7159 case AArch64::MUL_ZZZ_B:
7160 case AArch64::MUL_ZZZ_H:
7161 case AArch64::MUL_ZZZ_S:
7162 case AArch64::MUL_ZZZ_D:
7163 case AArch64::AND_ZZZ:
7164 case AArch64::ORR_ZZZ:
7165 case AArch64::EOR_ZZZ:
7166 return true;
7167
7168 default:
7169 return false;
7170 }
7171}
7172
7173/// Find instructions that can be turned into madd.
7175 SmallVectorImpl<unsigned> &Patterns) {
7176 unsigned Opc = Root.getOpcode();
7177 MachineBasicBlock &MBB = *Root.getParent();
7178 bool Found = false;
7179
7181 return false;
7183 int Cmp_NZCV =
7184 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true);
7185 // When NZCV is live bail out.
7186 if (Cmp_NZCV == -1)
7187 return false;
7188 unsigned NewOpc = convertToNonFlagSettingOpc(Root);
7189 // When opcode can't change bail out.
7190 // CHECKME: do we miss any cases for opcode conversion?
7191 if (NewOpc == Opc)
7192 return false;
7193 Opc = NewOpc;
7194 }
7195
7196 auto setFound = [&](int Opcode, int Operand, unsigned ZeroReg,
7197 unsigned Pattern) {
7198 if (canCombineWithMUL(MBB, Root.getOperand(Operand), Opcode, ZeroReg)) {
7199 Patterns.push_back(Pattern);
7200 Found = true;
7201 }
7202 };
7203
7204 auto setVFound = [&](int Opcode, int Operand, unsigned Pattern) {
7205 if (canCombine(MBB, Root.getOperand(Operand), Opcode)) {
7206 Patterns.push_back(Pattern);
7207 Found = true;
7208 }
7209 };
7210
7212
7213 switch (Opc) {
7214 default:
7215 break;
7216 case AArch64::ADDWrr:
7217 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7218 "ADDWrr does not have register operands");
7219 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDW_OP1);
7220 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULADDW_OP2);
7221 break;
7222 case AArch64::ADDXrr:
7223 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDX_OP1);
7224 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULADDX_OP2);
7225 break;
7226 case AArch64::SUBWrr:
7227 setFound(AArch64::MADDWrrr, 2, AArch64::WZR, MCP::MULSUBW_OP2);
7228 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBW_OP1);
7229 break;
7230 case AArch64::SUBXrr:
7231 setFound(AArch64::MADDXrrr, 2, AArch64::XZR, MCP::MULSUBX_OP2);
7232 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBX_OP1);
7233 break;
7234 case AArch64::ADDWri:
7235 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULADDWI_OP1);
7236 break;
7237 case AArch64::ADDXri:
7238 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULADDXI_OP1);
7239 break;
7240 case AArch64::SUBWri:
7241 setFound(AArch64::MADDWrrr, 1, AArch64::WZR, MCP::MULSUBWI_OP1);
7242 break;
7243 case AArch64::SUBXri:
7244 setFound(AArch64::MADDXrrr, 1, AArch64::XZR, MCP::MULSUBXI_OP1);
7245 break;
7246 case AArch64::ADDv8i8:
7247 setVFound(AArch64::MULv8i8, 1, MCP::MULADDv8i8_OP1);
7248 setVFound(AArch64::MULv8i8, 2, MCP::MULADDv8i8_OP2);
7249 break;
7250 case AArch64::ADDv16i8:
7251 setVFound(AArch64::MULv16i8, 1, MCP::MULADDv16i8_OP1);
7252 setVFound(AArch64::MULv16i8, 2, MCP::MULADDv16i8_OP2);
7253 break;
7254 case AArch64::ADDv4i16:
7255 setVFound(AArch64::MULv4i16, 1, MCP::MULADDv4i16_OP1);
7256 setVFound(AArch64::MULv4i16, 2, MCP::MULADDv4i16_OP2);
7257 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULADDv4i16_indexed_OP1);
7258 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULADDv4i16_indexed_OP2);
7259 break;
7260 case AArch64::ADDv8i16:
7261 setVFound(AArch64::MULv8i16, 1, MCP::MULADDv8i16_OP1);
7262 setVFound(AArch64::MULv8i16, 2, MCP::MULADDv8i16_OP2);
7263 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULADDv8i16_indexed_OP1);
7264 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULADDv8i16_indexed_OP2);
7265 break;
7266 case AArch64::ADDv2i32:
7267 setVFound(AArch64::MULv2i32, 1, MCP::MULADDv2i32_OP1);
7268 setVFound(AArch64::MULv2i32, 2, MCP::MULADDv2i32_OP2);
7269 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULADDv2i32_indexed_OP1);
7270 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULADDv2i32_indexed_OP2);
7271 break;
7272 case AArch64::ADDv4i32:
7273 setVFound(AArch64::MULv4i32, 1, MCP::MULADDv4i32_OP1);
7274 setVFound(AArch64::MULv4i32, 2, MCP::MULADDv4i32_OP2);
7275 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULADDv4i32_indexed_OP1);
7276 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULADDv4i32_indexed_OP2);
7277 break;
7278 case AArch64::SUBv8i8:
7279 setVFound(AArch64::MULv8i8, 1, MCP::MULSUBv8i8_OP1);
7280 setVFound(AArch64::MULv8i8, 2, MCP::MULSUBv8i8_OP2);
7281 break;
7282 case AArch64::SUBv16i8:
7283 setVFound(AArch64::MULv16i8, 1, MCP::MULSUBv16i8_OP1);
7284 setVFound(AArch64::MULv16i8, 2, MCP::MULSUBv16i8_OP2);
7285 break;
7286 case AArch64::SUBv4i16:
7287 setVFound(AArch64::MULv4i16, 1, MCP::MULSUBv4i16_OP1);
7288 setVFound(AArch64::MULv4i16, 2, MCP::MULSUBv4i16_OP2);
7289 setVFound(AArch64::MULv4i16_indexed, 1, MCP::MULSUBv4i16_indexed_OP1);
7290 setVFound(AArch64::MULv4i16_indexed, 2, MCP::MULSUBv4i16_indexed_OP2);
7291 break;
7292 case AArch64::SUBv8i16:
7293 setVFound(AArch64::MULv8i16, 1, MCP::MULSUBv8i16_OP1);
7294 setVFound(AArch64::MULv8i16, 2, MCP::MULSUBv8i16_OP2);
7295 setVFound(AArch64::MULv8i16_indexed, 1, MCP::MULSUBv8i16_indexed_OP1);
7296 setVFound(AArch64::MULv8i16_indexed, 2, MCP::MULSUBv8i16_indexed_OP2);
7297 break;
7298 case AArch64::SUBv2i32:
7299 setVFound(AArch64::MULv2i32, 1, MCP::MULSUBv2i32_OP1);
7300 setVFound(AArch64::MULv2i32, 2, MCP::MULSUBv2i32_OP2);
7301 setVFound(AArch64::MULv2i32_indexed, 1, MCP::MULSUBv2i32_indexed_OP1);
7302 setVFound(AArch64::MULv2i32_indexed, 2, MCP::MULSUBv2i32_indexed_OP2);
7303 break;
7304 case AArch64::SUBv4i32:
7305 setVFound(AArch64::MULv4i32, 1, MCP::MULSUBv4i32_OP1);
7306 setVFound(AArch64::MULv4i32, 2, MCP::MULSUBv4i32_OP2);
7307 setVFound(AArch64::MULv4i32_indexed, 1, MCP::MULSUBv4i32_indexed_OP1);
7308 setVFound(AArch64::MULv4i32_indexed, 2, MCP::MULSUBv4i32_indexed_OP2);
7309 break;
7310 }
7311 return Found;
7312}
7313
7314bool AArch64InstrInfo::isAccumulationOpcode(unsigned Opcode) const {
7315 switch (Opcode) {
7316 default:
7317 break;
7318 case AArch64::UABALB_ZZZ_D:
7319 case AArch64::UABALB_ZZZ_H:
7320 case AArch64::UABALB_ZZZ_S:
7321 case AArch64::UABALT_ZZZ_D:
7322 case AArch64::UABALT_ZZZ_H:
7323 case AArch64::UABALT_ZZZ_S:
7324 case AArch64::SABALB_ZZZ_D:
7325 case AArch64::SABALB_ZZZ_S:
7326 case AArch64::SABALB_ZZZ_H:
7327 case AArch64::SABALT_ZZZ_D:
7328 case AArch64::SABALT_ZZZ_S:
7329 case AArch64::SABALT_ZZZ_H:
7330 case AArch64::UABALv16i8_v8i16:
7331 case AArch64::UABALv2i32_v2i64:
7332 case AArch64::UABALv4i16_v4i32:
7333 case AArch64::UABALv4i32_v2i64:
7334 case AArch64::UABALv8i16_v4i32:
7335 case AArch64::UABALv8i8_v8i16:
7336 case AArch64::UABAv16i8:
7337 case AArch64::UABAv2i32:
7338 case AArch64::UABAv4i16:
7339 case AArch64::UABAv4i32:
7340 case AArch64::UABAv8i16:
7341 case AArch64::UABAv8i8:
7342 case AArch64::SABALv16i8_v8i16:
7343 case AArch64::SABALv2i32_v2i64:
7344 case AArch64::SABALv4i16_v4i32:
7345 case AArch64::SABALv4i32_v2i64:
7346 case AArch64::SABALv8i16_v4i32:
7347 case AArch64::SABALv8i8_v8i16:
7348 case AArch64::SABAv16i8:
7349 case AArch64::SABAv2i32:
7350 case AArch64::SABAv4i16:
7351 case AArch64::SABAv4i32:
7352 case AArch64::SABAv8i16:
7353 case AArch64::SABAv8i8:
7354 return true;
7355 }
7356
7357 return false;
7358}
7359
7360unsigned AArch64InstrInfo::getAccumulationStartOpcode(
7361 unsigned AccumulationOpcode) const {
7362 switch (AccumulationOpcode) {
7363 default:
7364 llvm_unreachable("Unsupported accumulation Opcode!");
7365 case AArch64::UABALB_ZZZ_D:
7366 return AArch64::UABDLB_ZZZ_D;
7367 case AArch64::UABALB_ZZZ_H:
7368 return AArch64::UABDLB_ZZZ_H;
7369 case AArch64::UABALB_ZZZ_S:
7370 return AArch64::UABDLB_ZZZ_S;
7371 case AArch64::UABALT_ZZZ_D:
7372 return AArch64::UABDLT_ZZZ_D;
7373 case AArch64::UABALT_ZZZ_H:
7374 return AArch64::UABDLT_ZZZ_H;
7375 case AArch64::UABALT_ZZZ_S:
7376 return AArch64::UABDLT_ZZZ_S;
7377 case AArch64::UABALv16i8_v8i16:
7378 return AArch64::UABDLv16i8_v8i16;
7379 case AArch64::UABALv2i32_v2i64:
7380 return AArch64::UABDLv2i32_v2i64;
7381 case AArch64::UABALv4i16_v4i32:
7382 return AArch64::UABDLv4i16_v4i32;
7383 case AArch64::UABALv4i32_v2i64:
7384 return AArch64::UABDLv4i32_v2i64;
7385 case AArch64::UABALv8i16_v4i32:
7386 return AArch64::UABDLv8i16_v4i32;
7387 case AArch64::UABALv8i8_v8i16:
7388 return AArch64::UABDLv8i8_v8i16;
7389 case AArch64::UABAv16i8:
7390 return AArch64::UABDv16i8;
7391 case AArch64::UABAv2i32:
7392 return AArch64::UABDv2i32;
7393 case AArch64::UABAv4i16:
7394 return AArch64::UABDv4i16;
7395 case AArch64::UABAv4i32:
7396 return AArch64::UABDv4i32;
7397 case AArch64::UABAv8i16:
7398 return AArch64::UABDv8i16;
7399 case AArch64::UABAv8i8:
7400 return AArch64::UABDv8i8;
7401 case AArch64::SABALB_ZZZ_D:
7402 return AArch64::SABDLB_ZZZ_D;
7403 case AArch64::SABALB_ZZZ_S:
7404 return AArch64::SABDLB_ZZZ_S;
7405 case AArch64::SABALB_ZZZ_H:
7406 return AArch64::SABDLB_ZZZ_H;
7407 case AArch64::SABALT_ZZZ_D:
7408 return AArch64::SABDLT_ZZZ_D;
7409 case AArch64::SABALT_ZZZ_S:
7410 return AArch64::SABDLT_ZZZ_S;
7411 case AArch64::SABALT_ZZZ_H:
7412 return AArch64::SABDLT_ZZZ_H;
7413 case AArch64::SABALv16i8_v8i16:
7414 return AArch64::SABDLv16i8_v8i16;
7415 case AArch64::SABALv2i32_v2i64:
7416 return AArch64::SABDLv2i32_v2i64;
7417 case AArch64::SABALv4i16_v4i32:
7418 return AArch64::SABDLv4i16_v4i32;
7419 case AArch64::SABALv4i32_v2i64:
7420 return AArch64::SABDLv4i32_v2i64;
7421 case AArch64::SABALv8i16_v4i32:
7422 return AArch64::SABDLv8i16_v4i32;
7423 case AArch64::SABALv8i8_v8i16:
7424 return AArch64::SABDLv8i8_v8i16;
7425 case AArch64::SABAv16i8:
7426 return AArch64::SABDv16i8;
7427 case AArch64::SABAv2i32:
7428 return AArch64::SABAv2i32;
7429 case AArch64::SABAv4i16:
7430 return AArch64::SABDv4i16;
7431 case AArch64::SABAv4i32:
7432 return AArch64::SABDv4i32;
7433 case AArch64::SABAv8i16:
7434 return AArch64::SABDv8i16;
7435 case AArch64::SABAv8i8:
7436 return AArch64::SABDv8i8;
7437 }
7438}
7439
7440/// Floating-Point Support
7441
7442/// Find instructions that can be turned into madd.
7444 SmallVectorImpl<unsigned> &Patterns) {
7445
7446 if (!isCombineInstrCandidateFP(Root))
7447 return false;
7448
7449 MachineBasicBlock &MBB = *Root.getParent();
7450 bool Found = false;
7451
7452 auto Match = [&](int Opcode, int Operand, unsigned Pattern) -> bool {
7453 if (canCombineWithFMUL(MBB, Root.getOperand(Operand), Opcode)) {
7454 Patterns.push_back(Pattern);
7455 return true;
7456 }
7457 return false;
7458 };
7459
7461
7462 switch (Root.getOpcode()) {
7463 default:
7464 assert(false && "Unsupported FP instruction in combiner\n");
7465 break;
7466 case AArch64::FADDHrr:
7467 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7468 "FADDHrr does not have register operands");
7469
7470 Found = Match(AArch64::FMULHrr, 1, MCP::FMULADDH_OP1);
7471 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULADDH_OP2);
7472 break;
7473 case AArch64::FADDSrr:
7474 assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
7475 "FADDSrr does not have register operands");
7476
7477 Found |= Match(AArch64::FMULSrr, 1, MCP::FMULADDS_OP1) ||
7478 Match(AArch64::FMULv1i32_indexed, 1, MCP::FMLAv1i32_indexed_OP1);
7479
7480 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULADDS_OP2) ||
7481 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLAv1i32_indexed_OP2);
7482 break;
7483 case AArch64::FADDDrr:
7484 Found |= Match(AArch64::FMULDrr, 1, MCP::FMULADDD_OP1) ||
7485 Match(AArch64::FMULv1i64_indexed, 1, MCP::FMLAv1i64_indexed_OP1);
7486
7487 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULADDD_OP2) ||
7488 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLAv1i64_indexed_OP2);
7489 break;
7490 case AArch64::FADDv4f16:
7491 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLAv4i16_indexed_OP1) ||
7492 Match(AArch64::FMULv4f16, 1, MCP::FMLAv4f16_OP1);
7493
7494 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLAv4i16_indexed_OP2) ||
7495 Match(AArch64::FMULv4f16, 2, MCP::FMLAv4f16_OP2);
7496 break;
7497 case AArch64::FADDv8f16:
7498 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLAv8i16_indexed_OP1) ||
7499 Match(AArch64::FMULv8f16, 1, MCP::FMLAv8f16_OP1);
7500
7501 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLAv8i16_indexed_OP2) ||
7502 Match(AArch64::FMULv8f16, 2, MCP::FMLAv8f16_OP2);
7503 break;
7504 case AArch64::FADDv2f32:
7505 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLAv2i32_indexed_OP1) ||
7506 Match(AArch64::FMULv2f32, 1, MCP::FMLAv2f32_OP1);
7507
7508 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLAv2i32_indexed_OP2) ||
7509 Match(AArch64::FMULv2f32, 2, MCP::FMLAv2f32_OP2);
7510 break;
7511 case AArch64::FADDv2f64:
7512 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLAv2i64_indexed_OP1) ||
7513 Match(AArch64::FMULv2f64, 1, MCP::FMLAv2f64_OP1);
7514
7515 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLAv2i64_indexed_OP2) ||
7516 Match(AArch64::FMULv2f64, 2, MCP::FMLAv2f64_OP2);
7517 break;
7518 case AArch64::FADDv4f32:
7519 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLAv4i32_indexed_OP1) ||
7520 Match(AArch64::FMULv4f32, 1, MCP::FMLAv4f32_OP1);
7521
7522 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLAv4i32_indexed_OP2) ||
7523 Match(AArch64::FMULv4f32, 2, MCP::FMLAv4f32_OP2);
7524 break;
7525 case AArch64::FSUBHrr:
7526 Found = Match(AArch64::FMULHrr, 1, MCP::FMULSUBH_OP1);
7527 Found |= Match(AArch64::FMULHrr, 2, MCP::FMULSUBH_OP2);
7528 Found |= Match(AArch64::FNMULHrr, 1, MCP::FNMULSUBH_OP1);
7529 break;
7530 case AArch64::FSUBSrr:
7531 Found = Match(AArch64::FMULSrr, 1, MCP::FMULSUBS_OP1);
7532
7533 Found |= Match(AArch64::FMULSrr, 2, MCP::FMULSUBS_OP2) ||
7534 Match(AArch64::FMULv1i32_indexed, 2, MCP::FMLSv1i32_indexed_OP2);
7535
7536 Found |= Match(AArch64::FNMULSrr, 1, MCP::FNMULSUBS_OP1);
7537 break;
7538 case AArch64::FSUBDrr:
7539 Found = Match(AArch64::FMULDrr, 1, MCP::FMULSUBD_OP1);
7540
7541 Found |= Match(AArch64::FMULDrr, 2, MCP::FMULSUBD_OP2) ||
7542 Match(AArch64::FMULv1i64_indexed, 2, MCP::FMLSv1i64_indexed_OP2);
7543
7544 Found |= Match(AArch64::FNMULDrr, 1, MCP::FNMULSUBD_OP1);
7545 break;
7546 case AArch64::FSUBv4f16:
7547 Found |= Match(AArch64::FMULv4i16_indexed, 2, MCP::FMLSv4i16_indexed_OP2) ||
7548 Match(AArch64::FMULv4f16, 2, MCP::FMLSv4f16_OP2);
7549
7550 Found |= Match(AArch64::FMULv4i16_indexed, 1, MCP::FMLSv4i16_indexed_OP1) ||
7551 Match(AArch64::FMULv4f16, 1, MCP::FMLSv4f16_OP1);
7552 break;
7553 case AArch64::FSUBv8f16:
7554 Found |= Match(AArch64::FMULv8i16_indexed, 2, MCP::FMLSv8i16_indexed_OP2) ||
7555 Match(AArch64::FMULv8f16, 2, MCP::FMLSv8f16_OP2);
7556
7557 Found |= Match(AArch64::FMULv8i16_indexed, 1, MCP::FMLSv8i16_indexed_OP1) ||
7558 Match(AArch64::FMULv8f16, 1, MCP::FMLSv8f16_OP1);
7559 break;
7560 case AArch64::FSUBv2f32:
7561 Found |= Match(AArch64::FMULv2i32_indexed, 2, MCP::FMLSv2i32_indexed_OP2) ||
7562 Match(AArch64::FMULv2f32, 2, MCP::FMLSv2f32_OP2);
7563
7564 Found |= Match(AArch64::FMULv2i32_indexed, 1, MCP::FMLSv2i32_indexed_OP1) ||
7565 Match(AArch64::FMULv2f32, 1, MCP::FMLSv2f32_OP1);
7566 break;
7567 case AArch64::FSUBv2f64:
7568 Found |= Match(AArch64::FMULv2i64_indexed, 2, MCP::FMLSv2i64_indexed_OP2) ||
7569 Match(AArch64::FMULv2f64, 2, MCP::FMLSv2f64_OP2);
7570
7571 Found |= Match(AArch64::FMULv2i64_indexed, 1, MCP::FMLSv2i64_indexed_OP1) ||
7572 Match(AArch64::FMULv2f64, 1, MCP::FMLSv2f64_OP1);
7573 break;
7574 case AArch64::FSUBv4f32:
7575 Found |= Match(AArch64::FMULv4i32_indexed, 2, MCP::FMLSv4i32_indexed_OP2) ||
7576 Match(AArch64::FMULv4f32, 2, MCP::FMLSv4f32_OP2);
7577
7578 Found |= Match(AArch64::FMULv4i32_indexed, 1, MCP::FMLSv4i32_indexed_OP1) ||
7579 Match(AArch64::FMULv4f32, 1, MCP::FMLSv4f32_OP1);
7580 break;
7581 }
7582 return Found;
7583}
7584
7586 SmallVectorImpl<unsigned> &Patterns) {
7587 MachineBasicBlock &MBB = *Root.getParent();
7588 bool Found = false;
7589
7590 auto Match = [&](unsigned Opcode, int Operand, unsigned Pattern) -> bool {
7591 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7592 MachineOperand &MO = Root.getOperand(Operand);
7593 MachineInstr *MI = nullptr;
7594 if (MO.isReg() && MO.getReg().isVirtual())
7595 MI = MRI.getUniqueVRegDef(MO.getReg());
7596 // Ignore No-op COPYs in FMUL(COPY(DUP(..)))
7597 if (MI && MI->getOpcode() == TargetOpcode::COPY &&
7598 MI->getOperand(1).getReg().isVirtual())
7599 MI = MRI.getUniqueVRegDef(MI->getOperand(1).getReg());
7600 if (MI && MI->getOpcode() == Opcode) {
7601 Patterns.push_back(Pattern);
7602 return true;
7603 }
7604 return false;
7605 };
7606
7608
7609 switch (Root.getOpcode()) {
7610 default:
7611 return false;
7612 case AArch64::FMULv2f32:
7613 Found = Match(AArch64::DUPv2i32lane, 1, MCP::FMULv2i32_indexed_OP1);
7614 Found |= Match(AArch64::DUPv2i32lane, 2, MCP::FMULv2i32_indexed_OP2);
7615 break;
7616 case AArch64::FMULv2f64:
7617 Found = Match(AArch64::DUPv2i64lane, 1, MCP::FMULv2i64_indexed_OP1);
7618 Found |= Match(AArch64::DUPv2i64lane, 2, MCP::FMULv2i64_indexed_OP2);
7619 break;
7620 case AArch64::FMULv4f16:
7621 Found = Match(AArch64::DUPv4i16lane, 1, MCP::FMULv4i16_indexed_OP1);
7622 Found |= Match(AArch64::DUPv4i16lane, 2, MCP::FMULv4i16_indexed_OP2);
7623 break;
7624 case AArch64::FMULv4f32:
7625 Found = Match(AArch64::DUPv4i32lane, 1, MCP::FMULv4i32_indexed_OP1);
7626 Found |= Match(AArch64::DUPv4i32lane, 2, MCP::FMULv4i32_indexed_OP2);
7627 break;
7628 case AArch64::FMULv8f16:
7629 Found = Match(AArch64::DUPv8i16lane, 1, MCP::FMULv8i16_indexed_OP1);
7630 Found |= Match(AArch64::DUPv8i16lane, 2, MCP::FMULv8i16_indexed_OP2);
7631 break;
7632 }
7633
7634 return Found;
7635}
7636
7638 SmallVectorImpl<unsigned> &Patterns) {
7639 unsigned Opc = Root.getOpcode();
7640 MachineBasicBlock &MBB = *Root.getParent();
7641 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
7642
7643 auto Match = [&](unsigned Opcode, unsigned Pattern) -> bool {
7644 MachineOperand &MO = Root.getOperand(1);
7645 MachineInstr *MI = MRI.getUniqueVRegDef(MO.getReg());
7646 if (MI != nullptr && (MI->getOpcode() == Opcode) &&
7647 MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()) &&
7651 MI->getFlag(MachineInstr::MIFlag::FmNsz)) {
7652 Patterns.push_back(Pattern);
7653 return true;
7654 }
7655 return false;
7656 };
7657
7658 switch (Opc) {
7659 default:
7660 break;
7661 case AArch64::FNEGDr:
7662 return Match(AArch64::FMADDDrrr, AArch64MachineCombinerPattern::FNMADD);
7663 case AArch64::FNEGSr:
7664 return Match(AArch64::FMADDSrrr, AArch64MachineCombinerPattern::FNMADD);
7665 }
7666
7667 return false;
7668}
7669
7670/// Return true when a code sequence can improve throughput. It
7671/// should be called only for instructions in loops.
7672/// \param Pattern - combiner pattern
7674 switch (Pattern) {
7675 default:
7676 break;
7782 return true;
7783 } // end switch (Pattern)
7784 return false;
7785}
7786
7787/// Find other MI combine patterns.
7789 SmallVectorImpl<unsigned> &Patterns) {
7790 // A - (B + C) ==> (A - B) - C or (A - C) - B
7791 unsigned Opc = Root.getOpcode();
7792 MachineBasicBlock &MBB = *Root.getParent();
7793
7794 switch (Opc) {
7795 case AArch64::SUBWrr:
7796 case AArch64::SUBSWrr:
7797 case AArch64::SUBXrr:
7798 case AArch64::SUBSXrr:
7799 // Found candidate root.
7800 break;
7801 default:
7802 return false;
7803 }
7804
7806 Root.findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr, true) ==
7807 -1)
7808 return false;
7809
7810 if (canCombine(MBB, Root.getOperand(2), AArch64::ADDWrr) ||
7811 canCombine(MBB, Root.getOperand(2), AArch64::ADDSWrr) ||
7812 canCombine(MBB, Root.getOperand(2), AArch64::ADDXrr) ||
7813 canCombine(MBB, Root.getOperand(2), AArch64::ADDSXrr)) {
7816 return true;
7817 }
7818
7819 return false;
7820}
7821
7822/// Check if the given instruction forms a gather load pattern that can be
7823/// optimized for better Memory-Level Parallelism (MLP). This function
7824/// identifies chains of NEON lane load instructions that load data from
7825/// different memory addresses into individual lanes of a 128-bit vector
7826/// register, then attempts to split the pattern into parallel loads to break
7827/// the serial dependency between instructions.
7828///
7829/// Pattern Matched:
7830/// Initial scalar load -> SUBREG_TO_REG (lane 0) -> LD1i* (lane 1) ->
7831/// LD1i* (lane 2) -> ... -> LD1i* (lane N-1, Root)
7832///
7833/// Transformed Into:
7834/// Two parallel vector loads using fewer lanes each, followed by ZIP1v2i64
7835/// to combine the results, enabling better memory-level parallelism.
7836///
7837/// Supported Element Types:
7838/// - 32-bit elements (LD1i32, 4 lanes total)
7839/// - 16-bit elements (LD1i16, 8 lanes total)
7840/// - 8-bit elements (LD1i8, 16 lanes total)
7842 SmallVectorImpl<unsigned> &Patterns,
7843 unsigned LoadLaneOpCode, unsigned NumLanes) {
7844 const MachineFunction *MF = Root.getMF();
7845
7846 // Early exit if optimizing for size.
7847 if (MF->getFunction().hasMinSize())
7848 return false;
7849
7850 const MachineRegisterInfo &MRI = MF->getRegInfo();
7852
7853 // The root of the pattern must load into the last lane of the vector.
7854 if (Root.getOperand(2).getImm() != NumLanes - 1)
7855 return false;
7856
7857 // Check that we have load into all lanes except lane 0.
7858 // For each load we also want to check that:
7859 // 1. It has a single non-debug use (since we will be replacing the virtual
7860 // register)
7861 // 2. That the addressing mode only uses a single pointer operand
7862 auto *CurrInstr = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
7863 auto Range = llvm::seq<unsigned>(1, NumLanes - 1);
7864 SmallSet<unsigned, 16> RemainingLanes(Range.begin(), Range.end());
7866 while (!RemainingLanes.empty() && CurrInstr &&
7867 CurrInstr->getOpcode() == LoadLaneOpCode &&
7868 MRI.hasOneNonDBGUse(CurrInstr->getOperand(0).getReg()) &&
7869 CurrInstr->getNumOperands() == 4) {
7870 RemainingLanes.erase(CurrInstr->getOperand(2).getImm());
7871 LoadInstrs.push_back(CurrInstr);
7872 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7873 }
7874
7875 // Check that we have found a match for lanes N-1.. 1.
7876 if (!RemainingLanes.empty())
7877 return false;
7878
7879 // Match the SUBREG_TO_REG sequence.
7880 if (CurrInstr->getOpcode() != TargetOpcode::SUBREG_TO_REG)
7881 return false;
7882
7883 // Verify that the subreg to reg loads an integer into the first lane.
7884 auto Lane0LoadReg = CurrInstr->getOperand(2).getReg();
7885 unsigned SingleLaneSizeInBits = 128 / NumLanes;
7886 if (TRI->getRegSizeInBits(Lane0LoadReg, MRI) != SingleLaneSizeInBits)
7887 return false;
7888
7889 // Verify that it also has a single non debug use.
7890 if (!MRI.hasOneNonDBGUse(Lane0LoadReg))
7891 return false;
7892
7893 LoadInstrs.push_back(MRI.getUniqueVRegDef(Lane0LoadReg));
7894
7895 // If there is any chance of aliasing, do not apply the pattern.
7896 // Walk backward through the MBB starting from Root.
7897 // Exit early if we've encountered all load instructions or hit the search
7898 // limit.
7899 auto MBBItr = Root.getIterator();
7900 unsigned RemainingSteps = GatherOptSearchLimit;
7901 SmallPtrSet<const MachineInstr *, 16> RemainingLoadInstrs;
7902 RemainingLoadInstrs.insert(LoadInstrs.begin(), LoadInstrs.end());
7903 const MachineBasicBlock *MBB = Root.getParent();
7904
7905 for (; MBBItr != MBB->begin() && RemainingSteps > 0 &&
7906 !RemainingLoadInstrs.empty();
7907 --MBBItr, --RemainingSteps) {
7908 const MachineInstr &CurrInstr = *MBBItr;
7909
7910 // Remove this instruction from remaining loads if it's one we're tracking.
7911 RemainingLoadInstrs.erase(&CurrInstr);
7912
7913 // Check for potential aliasing with any of the load instructions to
7914 // optimize.
7915 if (CurrInstr.isLoadFoldBarrier())
7916 return false;
7917 }
7918
7919 // If we hit the search limit without finding all load instructions,
7920 // don't match the pattern.
7921 if (RemainingSteps == 0 && !RemainingLoadInstrs.empty())
7922 return false;
7923
7924 switch (NumLanes) {
7925 case 4:
7927 break;
7928 case 8:
7930 break;
7931 case 16:
7933 break;
7934 default:
7935 llvm_unreachable("Got bad number of lanes for gather pattern.");
7936 }
7937
7938 return true;
7939}
7940
7941/// Search for patterns of LD instructions we can optimize.
7943 SmallVectorImpl<unsigned> &Patterns) {
7944
7945 // The pattern searches for loads into single lanes.
7946 switch (Root.getOpcode()) {
7947 case AArch64::LD1i32:
7948 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 4);
7949 case AArch64::LD1i16:
7950 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 8);
7951 case AArch64::LD1i8:
7952 return getGatherLanePattern(Root, Patterns, Root.getOpcode(), 16);
7953 default:
7954 return false;
7955 }
7956}
7957
7958/// Generate optimized instruction sequence for gather load patterns to improve
7959/// Memory-Level Parallelism (MLP). This function transforms a chain of
7960/// sequential NEON lane loads into parallel vector loads that can execute
7961/// concurrently.
7962static void
7966 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
7967 unsigned Pattern, unsigned NumLanes) {
7968 MachineFunction &MF = *Root.getParent()->getParent();
7971
7972 // Gather the initial load instructions to build the pattern.
7973 SmallVector<MachineInstr *, 16> LoadToLaneInstrs;
7974 MachineInstr *CurrInstr = &Root;
7975 for (unsigned i = 0; i < NumLanes - 1; ++i) {
7976 LoadToLaneInstrs.push_back(CurrInstr);
7977 CurrInstr = MRI.getUniqueVRegDef(CurrInstr->getOperand(1).getReg());
7978 }
7979
7980 // Sort the load instructions according to the lane.
7981 llvm::sort(LoadToLaneInstrs,
7982 [](const MachineInstr *A, const MachineInstr *B) {
7983 return A->getOperand(2).getImm() > B->getOperand(2).getImm();
7984 });
7985
7986 MachineInstr *SubregToReg = CurrInstr;
7987 LoadToLaneInstrs.push_back(
7988 MRI.getUniqueVRegDef(SubregToReg->getOperand(2).getReg()));
7989 auto LoadToLaneInstrsAscending = llvm::reverse(LoadToLaneInstrs);
7990
7991 const TargetRegisterClass *FPR128RegClass =
7992 MRI.getRegClass(Root.getOperand(0).getReg());
7993
7994 // Helper lambda to create a LD1 instruction.
7995 auto CreateLD1Instruction = [&](MachineInstr *OriginalInstr,
7996 Register SrcRegister, unsigned Lane,
7997 Register OffsetRegister,
7998 bool OffsetRegisterKillState) {
7999 auto NewRegister = MRI.createVirtualRegister(FPR128RegClass);
8000 MachineInstrBuilder LoadIndexIntoRegister =
8001 BuildMI(MF, MIMetadata(*OriginalInstr), TII->get(Root.getOpcode()),
8002 NewRegister)
8003 .addReg(SrcRegister)
8004 .addImm(Lane)
8005 .addReg(OffsetRegister, getKillRegState(OffsetRegisterKillState));
8006 InstrIdxForVirtReg.insert(std::make_pair(NewRegister, InsInstrs.size()));
8007 InsInstrs.push_back(LoadIndexIntoRegister);
8008 return NewRegister;
8009 };
8010
8011 // Helper to create load instruction based on the NumLanes in the NEON
8012 // register we are rewriting.
8013 auto CreateLDRInstruction = [&](unsigned NumLanes, Register DestReg,
8014 Register OffsetReg,
8015 bool KillState) -> MachineInstrBuilder {
8016 unsigned Opcode;
8017 switch (NumLanes) {
8018 case 4:
8019 Opcode = AArch64::LDRSui;
8020 break;
8021 case 8:
8022 Opcode = AArch64::LDRHui;
8023 break;
8024 case 16:
8025 Opcode = AArch64::LDRBui;
8026 break;
8027 default:
8029 "Got unsupported number of lanes in machine-combiner gather pattern");
8030 }
8031 // Immediate offset load
8032 return BuildMI(MF, MIMetadata(Root), TII->get(Opcode), DestReg)
8033 .addReg(OffsetReg)
8034 .addImm(0);
8035 };
8036
8037 // Load the remaining lanes into register 0.
8038 auto LanesToLoadToReg0 =
8039 llvm::make_range(LoadToLaneInstrsAscending.begin() + 1,
8040 LoadToLaneInstrsAscending.begin() + NumLanes / 2);
8041 Register PrevReg = SubregToReg->getOperand(0).getReg();
8042 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg0)) {
8043 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
8044 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
8045 OffsetRegOperand.getReg(),
8046 OffsetRegOperand.isKill());
8047 DelInstrs.push_back(LoadInstr);
8048 }
8049 Register LastLoadReg0 = PrevReg;
8050
8051 // First load into register 1. Perform an integer load to zero out the upper
8052 // lanes in a single instruction.
8053 MachineInstr *Lane0Load = *LoadToLaneInstrsAscending.begin();
8054 MachineInstr *OriginalSplitLoad =
8055 *std::next(LoadToLaneInstrsAscending.begin(), NumLanes / 2);
8056 Register DestRegForMiddleIndex = MRI.createVirtualRegister(
8057 MRI.getRegClass(Lane0Load->getOperand(0).getReg()));
8058
8059 const MachineOperand &OriginalSplitToLoadOffsetOperand =
8060 OriginalSplitLoad->getOperand(3);
8061 MachineInstrBuilder MiddleIndexLoadInstr =
8062 CreateLDRInstruction(NumLanes, DestRegForMiddleIndex,
8063 OriginalSplitToLoadOffsetOperand.getReg(),
8064 OriginalSplitToLoadOffsetOperand.isKill());
8065
8066 InstrIdxForVirtReg.insert(
8067 std::make_pair(DestRegForMiddleIndex, InsInstrs.size()));
8068 InsInstrs.push_back(MiddleIndexLoadInstr);
8069 DelInstrs.push_back(OriginalSplitLoad);
8070
8071 // Subreg To Reg instruction for register 1.
8072 Register DestRegForSubregToReg = MRI.createVirtualRegister(FPR128RegClass);
8073 unsigned SubregType;
8074 switch (NumLanes) {
8075 case 4:
8076 SubregType = AArch64::ssub;
8077 break;
8078 case 8:
8079 SubregType = AArch64::hsub;
8080 break;
8081 case 16:
8082 SubregType = AArch64::bsub;
8083 break;
8084 default:
8086 "Got invalid NumLanes for machine-combiner gather pattern");
8087 }
8088
8089 auto SubRegToRegInstr =
8090 BuildMI(MF, MIMetadata(Root), TII->get(SubregToReg->getOpcode()),
8091 DestRegForSubregToReg)
8092 .addImm(0)
8093 .addReg(DestRegForMiddleIndex, getKillRegState(true))
8094 .addImm(SubregType);
8095 InstrIdxForVirtReg.insert(
8096 std::make_pair(DestRegForSubregToReg, InsInstrs.size()));
8097 InsInstrs.push_back(SubRegToRegInstr);
8098
8099 // Load remaining lanes into register 1.
8100 auto LanesToLoadToReg1 =
8101 llvm::make_range(LoadToLaneInstrsAscending.begin() + NumLanes / 2 + 1,
8102 LoadToLaneInstrsAscending.end());
8103 PrevReg = SubRegToRegInstr->getOperand(0).getReg();
8104 for (auto [Index, LoadInstr] : llvm::enumerate(LanesToLoadToReg1)) {
8105 const MachineOperand &OffsetRegOperand = LoadInstr->getOperand(3);
8106 PrevReg = CreateLD1Instruction(LoadInstr, PrevReg, Index + 1,
8107 OffsetRegOperand.getReg(),
8108 OffsetRegOperand.isKill());
8109
8110 // Do not add the last reg to DelInstrs - it will be removed later.
8111 if (Index == NumLanes / 2 - 2) {
8112 break;
8113 }
8114 DelInstrs.push_back(LoadInstr);
8115 }
8116 Register LastLoadReg1 = PrevReg;
8117
8118 // Create the final zip instruction to combine the results.
8119 MachineInstrBuilder ZipInstr =
8120 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::ZIP1v2i64),
8121 Root.getOperand(0).getReg())
8122 .addReg(LastLoadReg0)
8123 .addReg(LastLoadReg1);
8124 InsInstrs.push_back(ZipInstr);
8125}
8126
8140
8141/// Return true when there is potentially a faster code sequence for an
8142/// instruction chain ending in \p Root. All potential patterns are listed in
8143/// the \p Pattern vector. Pattern should be sorted in priority order since the
8144/// pattern evaluator stops checking as soon as it finds a faster sequence.
8145
8146bool AArch64InstrInfo::getMachineCombinerPatterns(
8147 MachineInstr &Root, SmallVectorImpl<unsigned> &Patterns,
8148 bool DoRegPressureReduce) const {
8149 // Integer patterns
8150 if (getMaddPatterns(Root, Patterns))
8151 return true;
8152 // Floating point patterns
8153 if (getFMULPatterns(Root, Patterns))
8154 return true;
8155 if (getFMAPatterns(Root, Patterns))
8156 return true;
8157 if (getFNEGPatterns(Root, Patterns))
8158 return true;
8159
8160 // Other patterns
8161 if (getMiscPatterns(Root, Patterns))
8162 return true;
8163
8164 // Load patterns
8165 if (getLoadPatterns(Root, Patterns))
8166 return true;
8167
8168 return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns,
8169 DoRegPressureReduce);
8170}
8171
8173/// genFusedMultiply - Generate fused multiply instructions.
8174/// This function supports both integer and floating point instructions.
8175/// A typical example:
8176/// F|MUL I=A,B,0
8177/// F|ADD R,I,C
8178/// ==> F|MADD R,A,B,C
8179/// \param MF Containing MachineFunction
8180/// \param MRI Register information
8181/// \param TII Target information
8182/// \param Root is the F|ADD instruction
8183/// \param [out] InsInstrs is a vector of machine instructions and will
8184/// contain the generated madd instruction
8185/// \param IdxMulOpd is index of operand in Root that is the result of
8186/// the F|MUL. In the example above IdxMulOpd is 1.
8187/// \param MaddOpc the opcode fo the f|madd instruction
8188/// \param RC Register class of operands
8189/// \param kind of fma instruction (addressing mode) to be generated
8190/// \param ReplacedAddend is the result register from the instruction
8191/// replacing the non-combined operand, if any.
8192static MachineInstr *
8194 const TargetInstrInfo *TII, MachineInstr &Root,
8195 SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
8196 unsigned MaddOpc, const TargetRegisterClass *RC,
8198 const Register *ReplacedAddend = nullptr) {
8199 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8200
8201 unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
8202 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8203 Register ResultReg = Root.getOperand(0).getReg();
8204 Register SrcReg0 = MUL->getOperand(1).getReg();
8205 bool Src0IsKill = MUL->getOperand(1).isKill();
8206 Register SrcReg1 = MUL->getOperand(2).getReg();
8207 bool Src1IsKill = MUL->getOperand(2).isKill();
8208
8209 Register SrcReg2;
8210 bool Src2IsKill;
8211 if (ReplacedAddend) {
8212 // If we just generated a new addend, we must be it's only use.
8213 SrcReg2 = *ReplacedAddend;
8214 Src2IsKill = true;
8215 } else {
8216 SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
8217 Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
8218 }
8219
8220 if (ResultReg.isVirtual())
8221 MRI.constrainRegClass(ResultReg, RC);
8222 if (SrcReg0.isVirtual())
8223 MRI.constrainRegClass(SrcReg0, RC);
8224 if (SrcReg1.isVirtual())
8225 MRI.constrainRegClass(SrcReg1, RC);
8226 if (SrcReg2.isVirtual())
8227 MRI.constrainRegClass(SrcReg2, RC);
8228
8230 if (kind == FMAInstKind::Default)
8231 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8232 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8233 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8234 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8235 else if (kind == FMAInstKind::Indexed)
8236 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8237 .addReg(SrcReg2, getKillRegState(Src2IsKill))
8238 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8239 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8240 .addImm(MUL->getOperand(3).getImm());
8241 else if (kind == FMAInstKind::Accumulator)
8242 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8243 .addReg(SrcReg2, getKillRegState(Src2IsKill))
8244 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8245 .addReg(SrcReg1, getKillRegState(Src1IsKill));
8246 else
8247 assert(false && "Invalid FMA instruction kind \n");
8248 // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
8249 InsInstrs.push_back(MIB);
8250 return MUL;
8251}
8252
8253static MachineInstr *
8255 const TargetInstrInfo *TII, MachineInstr &Root,
8257 MachineInstr *MAD = MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
8258
8259 unsigned Opc = 0;
8260 const TargetRegisterClass *RC = MRI.getRegClass(MAD->getOperand(0).getReg());
8261 if (AArch64::FPR32RegClass.hasSubClassEq(RC))
8262 Opc = AArch64::FNMADDSrrr;
8263 else if (AArch64::FPR64RegClass.hasSubClassEq(RC))
8264 Opc = AArch64::FNMADDDrrr;
8265 else
8266 return nullptr;
8267
8268 Register ResultReg = Root.getOperand(0).getReg();
8269 Register SrcReg0 = MAD->getOperand(1).getReg();
8270 Register SrcReg1 = MAD->getOperand(2).getReg();
8271 Register SrcReg2 = MAD->getOperand(3).getReg();
8272 bool Src0IsKill = MAD->getOperand(1).isKill();
8273 bool Src1IsKill = MAD->getOperand(2).isKill();
8274 bool Src2IsKill = MAD->getOperand(3).isKill();
8275 if (ResultReg.isVirtual())
8276 MRI.constrainRegClass(ResultReg, RC);
8277 if (SrcReg0.isVirtual())
8278 MRI.constrainRegClass(SrcReg0, RC);
8279 if (SrcReg1.isVirtual())
8280 MRI.constrainRegClass(SrcReg1, RC);
8281 if (SrcReg2.isVirtual())
8282 MRI.constrainRegClass(SrcReg2, RC);
8283
8285 BuildMI(MF, MIMetadata(Root), TII->get(Opc), ResultReg)
8286 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8287 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8288 .addReg(SrcReg2, getKillRegState(Src2IsKill));
8289 InsInstrs.push_back(MIB);
8290
8291 return MAD;
8292}
8293
8294/// Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
8295static MachineInstr *
8298 unsigned IdxDupOp, unsigned MulOpc,
8300 assert(((IdxDupOp == 1) || (IdxDupOp == 2)) &&
8301 "Invalid index of FMUL operand");
8302
8303 MachineFunction &MF = *Root.getMF();
8305
8306 MachineInstr *Dup =
8307 MF.getRegInfo().getUniqueVRegDef(Root.getOperand(IdxDupOp).getReg());
8308
8309 if (Dup->getOpcode() == TargetOpcode::COPY)
8310 Dup = MRI.getUniqueVRegDef(Dup->getOperand(1).getReg());
8311
8312 Register DupSrcReg = Dup->getOperand(1).getReg();
8313 MRI.clearKillFlags(DupSrcReg);
8314 MRI.constrainRegClass(DupSrcReg, RC);
8315
8316 unsigned DupSrcLane = Dup->getOperand(2).getImm();
8317
8318 unsigned IdxMulOp = IdxDupOp == 1 ? 2 : 1;
8319 MachineOperand &MulOp = Root.getOperand(IdxMulOp);
8320
8321 Register ResultReg = Root.getOperand(0).getReg();
8322
8324 MIB = BuildMI(MF, MIMetadata(Root), TII->get(MulOpc), ResultReg)
8325 .add(MulOp)
8326 .addReg(DupSrcReg)
8327 .addImm(DupSrcLane);
8328
8329 InsInstrs.push_back(MIB);
8330 return &Root;
8331}
8332
8333/// genFusedMultiplyAcc - Helper to generate fused multiply accumulate
8334/// instructions.
8335///
8336/// \see genFusedMultiply
8340 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8341 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8343}
8344
8345/// genNeg - Helper to generate an intermediate negation of the second operand
8346/// of Root
8348 const TargetInstrInfo *TII, MachineInstr &Root,
8350 DenseMap<Register, unsigned> &InstrIdxForVirtReg,
8351 unsigned MnegOpc, const TargetRegisterClass *RC) {
8352 Register NewVR = MRI.createVirtualRegister(RC);
8354 BuildMI(MF, MIMetadata(Root), TII->get(MnegOpc), NewVR)
8355 .add(Root.getOperand(2));
8356 InsInstrs.push_back(MIB);
8357
8358 assert(InstrIdxForVirtReg.empty());
8359 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8360
8361 return NewVR;
8362}
8363
8364/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8365/// instructions with an additional negation of the accumulator
8369 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8370 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8371 assert(IdxMulOpd == 1);
8372
8373 Register NewVR =
8374 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8375 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8376 FMAInstKind::Accumulator, &NewVR);
8377}
8378
8379/// genFusedMultiplyIdx - Helper to generate fused multiply accumulate
8380/// instructions.
8381///
8382/// \see genFusedMultiply
8386 unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC) {
8387 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8389}
8390
8391/// genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate
8392/// instructions with an additional negation of the accumulator
8396 DenseMap<Register, unsigned> &InstrIdxForVirtReg, unsigned IdxMulOpd,
8397 unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC) {
8398 assert(IdxMulOpd == 1);
8399
8400 Register NewVR =
8401 genNeg(MF, MRI, TII, Root, InsInstrs, InstrIdxForVirtReg, MnegOpc, RC);
8402
8403 return genFusedMultiply(MF, MRI, TII, Root, InsInstrs, IdxMulOpd, MaddOpc, RC,
8404 FMAInstKind::Indexed, &NewVR);
8405}
8406
8407/// genMaddR - Generate madd instruction and combine mul and add using
8408/// an extra virtual register
8409/// Example - an ADD intermediate needs to be stored in a register:
8410/// MUL I=A,B,0
8411/// ADD R,I,Imm
8412/// ==> ORR V, ZR, Imm
8413/// ==> MADD R,A,B,V
8414/// \param MF Containing MachineFunction
8415/// \param MRI Register information
8416/// \param TII Target information
8417/// \param Root is the ADD instruction
8418/// \param [out] InsInstrs is a vector of machine instructions and will
8419/// contain the generated madd instruction
8420/// \param IdxMulOpd is index of operand in Root that is the result of
8421/// the MUL. In the example above IdxMulOpd is 1.
8422/// \param MaddOpc the opcode fo the madd instruction
8423/// \param VR is a virtual register that holds the value of an ADD operand
8424/// (V in the example above).
8425/// \param RC Register class of operands
8427 const TargetInstrInfo *TII, MachineInstr &Root,
8429 unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
8430 const TargetRegisterClass *RC) {
8431 assert(IdxMulOpd == 1 || IdxMulOpd == 2);
8432
8433 MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
8434 Register ResultReg = Root.getOperand(0).getReg();
8435 Register SrcReg0 = MUL->getOperand(1).getReg();
8436 bool Src0IsKill = MUL->getOperand(1).isKill();
8437 Register SrcReg1 = MUL->getOperand(2).getReg();
8438 bool Src1IsKill = MUL->getOperand(2).isKill();
8439
8440 if (ResultReg.isVirtual())
8441 MRI.constrainRegClass(ResultReg, RC);
8442 if (SrcReg0.isVirtual())
8443 MRI.constrainRegClass(SrcReg0, RC);
8444 if (SrcReg1.isVirtual())
8445 MRI.constrainRegClass(SrcReg1, RC);
8447 MRI.constrainRegClass(VR, RC);
8448
8450 BuildMI(MF, MIMetadata(Root), TII->get(MaddOpc), ResultReg)
8451 .addReg(SrcReg0, getKillRegState(Src0IsKill))
8452 .addReg(SrcReg1, getKillRegState(Src1IsKill))
8453 .addReg(VR);
8454 // Insert the MADD
8455 InsInstrs.push_back(MIB);
8456 return MUL;
8457}
8458
8459/// Do the following transformation
8460/// A - (B + C) ==> (A - B) - C
8461/// A - (B + C) ==> (A - C) - B
8463 const TargetInstrInfo *TII, MachineInstr &Root,
8466 unsigned IdxOpd1,
8467 DenseMap<Register, unsigned> &InstrIdxForVirtReg) {
8468 assert(IdxOpd1 == 1 || IdxOpd1 == 2);
8469 unsigned IdxOtherOpd = IdxOpd1 == 1 ? 2 : 1;
8470 MachineInstr *AddMI = MRI.getUniqueVRegDef(Root.getOperand(2).getReg());
8471
8472 Register ResultReg = Root.getOperand(0).getReg();
8473 Register RegA = Root.getOperand(1).getReg();
8474 bool RegAIsKill = Root.getOperand(1).isKill();
8475 Register RegB = AddMI->getOperand(IdxOpd1).getReg();
8476 bool RegBIsKill = AddMI->getOperand(IdxOpd1).isKill();
8477 Register RegC = AddMI->getOperand(IdxOtherOpd).getReg();
8478 bool RegCIsKill = AddMI->getOperand(IdxOtherOpd).isKill();
8479 Register NewVR =
8480 MRI.createVirtualRegister(MRI.getRegClass(Root.getOperand(2).getReg()));
8481
8482 unsigned Opcode = Root.getOpcode();
8483 if (Opcode == AArch64::SUBSWrr)
8484 Opcode = AArch64::SUBWrr;
8485 else if (Opcode == AArch64::SUBSXrr)
8486 Opcode = AArch64::SUBXrr;
8487 else
8488 assert((Opcode == AArch64::SUBWrr || Opcode == AArch64::SUBXrr) &&
8489 "Unexpected instruction opcode.");
8490
8491 uint32_t Flags = Root.mergeFlagsWith(*AddMI);
8492 Flags &= ~MachineInstr::NoSWrap;
8493 Flags &= ~MachineInstr::NoUWrap;
8494
8495 MachineInstrBuilder MIB1 =
8496 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), NewVR)
8497 .addReg(RegA, getKillRegState(RegAIsKill))
8498 .addReg(RegB, getKillRegState(RegBIsKill))
8499 .setMIFlags(Flags);
8500 MachineInstrBuilder MIB2 =
8501 BuildMI(MF, MIMetadata(Root), TII->get(Opcode), ResultReg)
8502 .addReg(NewVR, getKillRegState(true))
8503 .addReg(RegC, getKillRegState(RegCIsKill))
8504 .setMIFlags(Flags);
8505
8506 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8507 InsInstrs.push_back(MIB1);
8508 InsInstrs.push_back(MIB2);
8509 DelInstrs.push_back(AddMI);
8510 DelInstrs.push_back(&Root);
8511}
8512
8513unsigned AArch64InstrInfo::getReduceOpcodeForAccumulator(
8514 unsigned int AccumulatorOpCode) const {
8515 switch (AccumulatorOpCode) {
8516 case AArch64::UABALB_ZZZ_D:
8517 case AArch64::SABALB_ZZZ_D:
8518 case AArch64::UABALT_ZZZ_D:
8519 case AArch64::SABALT_ZZZ_D:
8520 return AArch64::ADD_ZZZ_D;
8521 case AArch64::UABALB_ZZZ_H:
8522 case AArch64::SABALB_ZZZ_H:
8523 case AArch64::UABALT_ZZZ_H:
8524 case AArch64::SABALT_ZZZ_H:
8525 return AArch64::ADD_ZZZ_H;
8526 case AArch64::UABALB_ZZZ_S:
8527 case AArch64::SABALB_ZZZ_S:
8528 case AArch64::UABALT_ZZZ_S:
8529 case AArch64::SABALT_ZZZ_S:
8530 return AArch64::ADD_ZZZ_S;
8531 case AArch64::UABALv16i8_v8i16:
8532 case AArch64::SABALv8i8_v8i16:
8533 case AArch64::SABAv8i16:
8534 case AArch64::UABAv8i16:
8535 return AArch64::ADDv8i16;
8536 case AArch64::SABALv2i32_v2i64:
8537 case AArch64::UABALv2i32_v2i64:
8538 case AArch64::SABALv4i32_v2i64:
8539 return AArch64::ADDv2i64;
8540 case AArch64::UABALv4i16_v4i32:
8541 case AArch64::SABALv4i16_v4i32:
8542 case AArch64::SABALv8i16_v4i32:
8543 case AArch64::SABAv4i32:
8544 case AArch64::UABAv4i32:
8545 return AArch64::ADDv4i32;
8546 case AArch64::UABALv4i32_v2i64:
8547 return AArch64::ADDv2i64;
8548 case AArch64::UABALv8i16_v4i32:
8549 return AArch64::ADDv4i32;
8550 case AArch64::UABALv8i8_v8i16:
8551 case AArch64::SABALv16i8_v8i16:
8552 return AArch64::ADDv8i16;
8553 case AArch64::UABAv16i8:
8554 case AArch64::SABAv16i8:
8555 return AArch64::ADDv16i8;
8556 case AArch64::UABAv4i16:
8557 case AArch64::SABAv4i16:
8558 return AArch64::ADDv4i16;
8559 case AArch64::UABAv2i32:
8560 case AArch64::SABAv2i32:
8561 return AArch64::ADDv2i32;
8562 case AArch64::UABAv8i8:
8563 case AArch64::SABAv8i8:
8564 return AArch64::ADDv8i8;
8565 default:
8566 llvm_unreachable("Unknown accumulator opcode");
8567 }
8568}
8569
8570/// When getMachineCombinerPatterns() finds potential patterns,
8571/// this function generates the instructions that could replace the
8572/// original code sequence
8573void AArch64InstrInfo::genAlternativeCodeSequence(
8574 MachineInstr &Root, unsigned Pattern,
8577 DenseMap<Register, unsigned> &InstrIdxForVirtReg) const {
8578 MachineBasicBlock &MBB = *Root.getParent();
8579 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
8580 MachineFunction &MF = *MBB.getParent();
8581 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
8582
8583 MachineInstr *MUL = nullptr;
8584 const TargetRegisterClass *RC;
8585 unsigned Opc;
8586 switch (Pattern) {
8587 default:
8588 // Reassociate instructions.
8589 TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
8590 DelInstrs, InstrIdxForVirtReg);
8591 return;
8593 // A - (B + C)
8594 // ==> (A - B) - C
8595 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 1,
8596 InstrIdxForVirtReg);
8597 return;
8599 // A - (B + C)
8600 // ==> (A - C) - B
8601 genSubAdd2SubSub(MF, MRI, TII, Root, InsInstrs, DelInstrs, 2,
8602 InstrIdxForVirtReg);
8603 return;
8606 // MUL I=A,B,0
8607 // ADD R,I,C
8608 // ==> MADD R,A,B,C
8609 // --- Create(MADD);
8611 Opc = AArch64::MADDWrrr;
8612 RC = &AArch64::GPR32RegClass;
8613 } else {
8614 Opc = AArch64::MADDXrrr;
8615 RC = &AArch64::GPR64RegClass;
8616 }
8617 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8618 break;
8621 // MUL I=A,B,0
8622 // ADD R,C,I
8623 // ==> MADD R,A,B,C
8624 // --- Create(MADD);
8626 Opc = AArch64::MADDWrrr;
8627 RC = &AArch64::GPR32RegClass;
8628 } else {
8629 Opc = AArch64::MADDXrrr;
8630 RC = &AArch64::GPR64RegClass;
8631 }
8632 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8633 break;
8638 // MUL I=A,B,0
8639 // ADD/SUB R,I,Imm
8640 // ==> MOV V, Imm/-Imm
8641 // ==> MADD R,A,B,V
8642 // --- Create(MADD);
8643 const TargetRegisterClass *RC;
8644 unsigned BitSize, MovImm;
8647 MovImm = AArch64::MOVi32imm;
8648 RC = &AArch64::GPR32spRegClass;
8649 BitSize = 32;
8650 Opc = AArch64::MADDWrrr;
8651 RC = &AArch64::GPR32RegClass;
8652 } else {
8653 MovImm = AArch64::MOVi64imm;
8654 RC = &AArch64::GPR64spRegClass;
8655 BitSize = 64;
8656 Opc = AArch64::MADDXrrr;
8657 RC = &AArch64::GPR64RegClass;
8658 }
8659 Register NewVR = MRI.createVirtualRegister(RC);
8660 uint64_t Imm = Root.getOperand(2).getImm();
8661
8662 if (Root.getOperand(3).isImm()) {
8663 unsigned Val = Root.getOperand(3).getImm();
8664 Imm = Imm << Val;
8665 }
8666 bool IsSub = Pattern == AArch64MachineCombinerPattern::MULSUBWI_OP1 ||
8668 uint64_t UImm = SignExtend64(IsSub ? -Imm : Imm, BitSize);
8669 // Check that the immediate can be composed via a single instruction.
8671 AArch64_IMM::expandMOVImm(UImm, BitSize, Insn);
8672 if (Insn.size() != 1)
8673 return;
8674 MachineInstrBuilder MIB1 =
8675 BuildMI(MF, MIMetadata(Root), TII->get(MovImm), NewVR)
8676 .addImm(IsSub ? -Imm : Imm);
8677 InsInstrs.push_back(MIB1);
8678 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8679 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8680 break;
8681 }
8684 // MUL I=A,B,0
8685 // SUB R,I, C
8686 // ==> SUB V, 0, C
8687 // ==> MADD R,A,B,V // = -C + A*B
8688 // --- Create(MADD);
8689 const TargetRegisterClass *SubRC;
8690 unsigned SubOpc, ZeroReg;
8692 SubOpc = AArch64::SUBWrr;
8693 SubRC = &AArch64::GPR32spRegClass;
8694 ZeroReg = AArch64::WZR;
8695 Opc = AArch64::MADDWrrr;
8696 RC = &AArch64::GPR32RegClass;
8697 } else {
8698 SubOpc = AArch64::SUBXrr;
8699 SubRC = &AArch64::GPR64spRegClass;
8700 ZeroReg = AArch64::XZR;
8701 Opc = AArch64::MADDXrrr;
8702 RC = &AArch64::GPR64RegClass;
8703 }
8704 Register NewVR = MRI.createVirtualRegister(SubRC);
8705 // SUB NewVR, 0, C
8706 MachineInstrBuilder MIB1 =
8707 BuildMI(MF, MIMetadata(Root), TII->get(SubOpc), NewVR)
8708 .addReg(ZeroReg)
8709 .add(Root.getOperand(2));
8710 InsInstrs.push_back(MIB1);
8711 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
8712 MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
8713 break;
8714 }
8717 // MUL I=A,B,0
8718 // SUB R,C,I
8719 // ==> MSUB R,A,B,C (computes C - A*B)
8720 // --- Create(MSUB);
8722 Opc = AArch64::MSUBWrrr;
8723 RC = &AArch64::GPR32RegClass;
8724 } else {
8725 Opc = AArch64::MSUBXrrr;
8726 RC = &AArch64::GPR64RegClass;
8727 }
8728 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8729 break;
8731 Opc = AArch64::MLAv8i8;
8732 RC = &AArch64::FPR64RegClass;
8733 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8734 break;
8736 Opc = AArch64::MLAv8i8;
8737 RC = &AArch64::FPR64RegClass;
8738 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8739 break;
8741 Opc = AArch64::MLAv16i8;
8742 RC = &AArch64::FPR128RegClass;
8743 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8744 break;
8746 Opc = AArch64::MLAv16i8;
8747 RC = &AArch64::FPR128RegClass;
8748 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8749 break;
8751 Opc = AArch64::MLAv4i16;
8752 RC = &AArch64::FPR64RegClass;
8753 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8754 break;
8756 Opc = AArch64::MLAv4i16;
8757 RC = &AArch64::FPR64RegClass;
8758 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8759 break;
8761 Opc = AArch64::MLAv8i16;
8762 RC = &AArch64::FPR128RegClass;
8763 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8764 break;
8766 Opc = AArch64::MLAv8i16;
8767 RC = &AArch64::FPR128RegClass;
8768 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8769 break;
8771 Opc = AArch64::MLAv2i32;
8772 RC = &AArch64::FPR64RegClass;
8773 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8774 break;
8776 Opc = AArch64::MLAv2i32;
8777 RC = &AArch64::FPR64RegClass;
8778 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8779 break;
8781 Opc = AArch64::MLAv4i32;
8782 RC = &AArch64::FPR128RegClass;
8783 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8784 break;
8786 Opc = AArch64::MLAv4i32;
8787 RC = &AArch64::FPR128RegClass;
8788 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8789 break;
8790
8792 Opc = AArch64::MLAv8i8;
8793 RC = &AArch64::FPR64RegClass;
8794 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8795 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i8,
8796 RC);
8797 break;
8799 Opc = AArch64::MLSv8i8;
8800 RC = &AArch64::FPR64RegClass;
8801 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8802 break;
8804 Opc = AArch64::MLAv16i8;
8805 RC = &AArch64::FPR128RegClass;
8806 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8807 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv16i8,
8808 RC);
8809 break;
8811 Opc = AArch64::MLSv16i8;
8812 RC = &AArch64::FPR128RegClass;
8813 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8814 break;
8816 Opc = AArch64::MLAv4i16;
8817 RC = &AArch64::FPR64RegClass;
8818 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8819 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8820 RC);
8821 break;
8823 Opc = AArch64::MLSv4i16;
8824 RC = &AArch64::FPR64RegClass;
8825 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8826 break;
8828 Opc = AArch64::MLAv8i16;
8829 RC = &AArch64::FPR128RegClass;
8830 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8831 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8832 RC);
8833 break;
8835 Opc = AArch64::MLSv8i16;
8836 RC = &AArch64::FPR128RegClass;
8837 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8838 break;
8840 Opc = AArch64::MLAv2i32;
8841 RC = &AArch64::FPR64RegClass;
8842 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8843 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8844 RC);
8845 break;
8847 Opc = AArch64::MLSv2i32;
8848 RC = &AArch64::FPR64RegClass;
8849 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8850 break;
8852 Opc = AArch64::MLAv4i32;
8853 RC = &AArch64::FPR128RegClass;
8854 MUL = genFusedMultiplyAccNeg(MF, MRI, TII, Root, InsInstrs,
8855 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8856 RC);
8857 break;
8859 Opc = AArch64::MLSv4i32;
8860 RC = &AArch64::FPR128RegClass;
8861 MUL = genFusedMultiplyAcc(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8862 break;
8863
8865 Opc = AArch64::MLAv4i16_indexed;
8866 RC = &AArch64::FPR64RegClass;
8867 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8868 break;
8870 Opc = AArch64::MLAv4i16_indexed;
8871 RC = &AArch64::FPR64RegClass;
8872 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8873 break;
8875 Opc = AArch64::MLAv8i16_indexed;
8876 RC = &AArch64::FPR128RegClass;
8877 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8878 break;
8880 Opc = AArch64::MLAv8i16_indexed;
8881 RC = &AArch64::FPR128RegClass;
8882 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8883 break;
8885 Opc = AArch64::MLAv2i32_indexed;
8886 RC = &AArch64::FPR64RegClass;
8887 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8888 break;
8890 Opc = AArch64::MLAv2i32_indexed;
8891 RC = &AArch64::FPR64RegClass;
8892 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8893 break;
8895 Opc = AArch64::MLAv4i32_indexed;
8896 RC = &AArch64::FPR128RegClass;
8897 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8898 break;
8900 Opc = AArch64::MLAv4i32_indexed;
8901 RC = &AArch64::FPR128RegClass;
8902 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8903 break;
8904
8906 Opc = AArch64::MLAv4i16_indexed;
8907 RC = &AArch64::FPR64RegClass;
8908 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8909 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i16,
8910 RC);
8911 break;
8913 Opc = AArch64::MLSv4i16_indexed;
8914 RC = &AArch64::FPR64RegClass;
8915 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8916 break;
8918 Opc = AArch64::MLAv8i16_indexed;
8919 RC = &AArch64::FPR128RegClass;
8920 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8921 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv8i16,
8922 RC);
8923 break;
8925 Opc = AArch64::MLSv8i16_indexed;
8926 RC = &AArch64::FPR128RegClass;
8927 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8928 break;
8930 Opc = AArch64::MLAv2i32_indexed;
8931 RC = &AArch64::FPR64RegClass;
8932 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8933 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv2i32,
8934 RC);
8935 break;
8937 Opc = AArch64::MLSv2i32_indexed;
8938 RC = &AArch64::FPR64RegClass;
8939 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8940 break;
8942 Opc = AArch64::MLAv4i32_indexed;
8943 RC = &AArch64::FPR128RegClass;
8944 MUL = genFusedMultiplyIdxNeg(MF, MRI, TII, Root, InsInstrs,
8945 InstrIdxForVirtReg, 1, Opc, AArch64::NEGv4i32,
8946 RC);
8947 break;
8949 Opc = AArch64::MLSv4i32_indexed;
8950 RC = &AArch64::FPR128RegClass;
8951 MUL = genFusedMultiplyIdx(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8952 break;
8953
8954 // Floating Point Support
8956 Opc = AArch64::FMADDHrrr;
8957 RC = &AArch64::FPR16RegClass;
8958 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8959 break;
8961 Opc = AArch64::FMADDSrrr;
8962 RC = &AArch64::FPR32RegClass;
8963 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8964 break;
8966 Opc = AArch64::FMADDDrrr;
8967 RC = &AArch64::FPR64RegClass;
8968 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
8969 break;
8970
8972 Opc = AArch64::FMADDHrrr;
8973 RC = &AArch64::FPR16RegClass;
8974 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8975 break;
8977 Opc = AArch64::FMADDSrrr;
8978 RC = &AArch64::FPR32RegClass;
8979 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8980 break;
8982 Opc = AArch64::FMADDDrrr;
8983 RC = &AArch64::FPR64RegClass;
8984 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
8985 break;
8986
8988 Opc = AArch64::FMLAv1i32_indexed;
8989 RC = &AArch64::FPR32RegClass;
8990 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
8992 break;
8994 Opc = AArch64::FMLAv1i32_indexed;
8995 RC = &AArch64::FPR32RegClass;
8996 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
8998 break;
8999
9001 Opc = AArch64::FMLAv1i64_indexed;
9002 RC = &AArch64::FPR64RegClass;
9003 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9005 break;
9007 Opc = AArch64::FMLAv1i64_indexed;
9008 RC = &AArch64::FPR64RegClass;
9009 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9011 break;
9012
9014 RC = &AArch64::FPR64RegClass;
9015 Opc = AArch64::FMLAv4i16_indexed;
9016 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9018 break;
9020 RC = &AArch64::FPR64RegClass;
9021 Opc = AArch64::FMLAv4f16;
9022 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9024 break;
9026 RC = &AArch64::FPR64RegClass;
9027 Opc = AArch64::FMLAv4i16_indexed;
9028 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9030 break;
9032 RC = &AArch64::FPR64RegClass;
9033 Opc = AArch64::FMLAv4f16;
9034 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9036 break;
9037
9040 RC = &AArch64::FPR64RegClass;
9042 Opc = AArch64::FMLAv2i32_indexed;
9043 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9045 } else {
9046 Opc = AArch64::FMLAv2f32;
9047 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9049 }
9050 break;
9053 RC = &AArch64::FPR64RegClass;
9055 Opc = AArch64::FMLAv2i32_indexed;
9056 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9058 } else {
9059 Opc = AArch64::FMLAv2f32;
9060 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9062 }
9063 break;
9064
9066 RC = &AArch64::FPR128RegClass;
9067 Opc = AArch64::FMLAv8i16_indexed;
9068 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9070 break;
9072 RC = &AArch64::FPR128RegClass;
9073 Opc = AArch64::FMLAv8f16;
9074 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9076 break;
9078 RC = &AArch64::FPR128RegClass;
9079 Opc = AArch64::FMLAv8i16_indexed;
9080 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9082 break;
9084 RC = &AArch64::FPR128RegClass;
9085 Opc = AArch64::FMLAv8f16;
9086 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9088 break;
9089
9092 RC = &AArch64::FPR128RegClass;
9094 Opc = AArch64::FMLAv2i64_indexed;
9095 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9097 } else {
9098 Opc = AArch64::FMLAv2f64;
9099 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9101 }
9102 break;
9105 RC = &AArch64::FPR128RegClass;
9107 Opc = AArch64::FMLAv2i64_indexed;
9108 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9110 } else {
9111 Opc = AArch64::FMLAv2f64;
9112 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9114 }
9115 break;
9116
9119 RC = &AArch64::FPR128RegClass;
9121 Opc = AArch64::FMLAv4i32_indexed;
9122 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9124 } else {
9125 Opc = AArch64::FMLAv4f32;
9126 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9128 }
9129 break;
9130
9133 RC = &AArch64::FPR128RegClass;
9135 Opc = AArch64::FMLAv4i32_indexed;
9136 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9138 } else {
9139 Opc = AArch64::FMLAv4f32;
9140 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9142 }
9143 break;
9144
9146 Opc = AArch64::FNMSUBHrrr;
9147 RC = &AArch64::FPR16RegClass;
9148 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9149 break;
9151 Opc = AArch64::FNMSUBSrrr;
9152 RC = &AArch64::FPR32RegClass;
9153 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9154 break;
9156 Opc = AArch64::FNMSUBDrrr;
9157 RC = &AArch64::FPR64RegClass;
9158 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9159 break;
9160
9162 Opc = AArch64::FNMADDHrrr;
9163 RC = &AArch64::FPR16RegClass;
9164 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9165 break;
9167 Opc = AArch64::FNMADDSrrr;
9168 RC = &AArch64::FPR32RegClass;
9169 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9170 break;
9172 Opc = AArch64::FNMADDDrrr;
9173 RC = &AArch64::FPR64RegClass;
9174 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
9175 break;
9176
9178 Opc = AArch64::FMSUBHrrr;
9179 RC = &AArch64::FPR16RegClass;
9180 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9181 break;
9183 Opc = AArch64::FMSUBSrrr;
9184 RC = &AArch64::FPR32RegClass;
9185 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9186 break;
9188 Opc = AArch64::FMSUBDrrr;
9189 RC = &AArch64::FPR64RegClass;
9190 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
9191 break;
9192
9194 Opc = AArch64::FMLSv1i32_indexed;
9195 RC = &AArch64::FPR32RegClass;
9196 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9198 break;
9199
9201 Opc = AArch64::FMLSv1i64_indexed;
9202 RC = &AArch64::FPR64RegClass;
9203 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9205 break;
9206
9209 RC = &AArch64::FPR64RegClass;
9210 Register NewVR = MRI.createVirtualRegister(RC);
9211 MachineInstrBuilder MIB1 =
9212 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f16), NewVR)
9213 .add(Root.getOperand(2));
9214 InsInstrs.push_back(MIB1);
9215 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9217 Opc = AArch64::FMLAv4f16;
9218 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9219 FMAInstKind::Accumulator, &NewVR);
9220 } else {
9221 Opc = AArch64::FMLAv4i16_indexed;
9222 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9223 FMAInstKind::Indexed, &NewVR);
9224 }
9225 break;
9226 }
9228 RC = &AArch64::FPR64RegClass;
9229 Opc = AArch64::FMLSv4f16;
9230 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9232 break;
9234 RC = &AArch64::FPR64RegClass;
9235 Opc = AArch64::FMLSv4i16_indexed;
9236 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9238 break;
9239
9242 RC = &AArch64::FPR64RegClass;
9244 Opc = AArch64::FMLSv2i32_indexed;
9245 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9247 } else {
9248 Opc = AArch64::FMLSv2f32;
9249 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9251 }
9252 break;
9253
9256 RC = &AArch64::FPR128RegClass;
9257 Register NewVR = MRI.createVirtualRegister(RC);
9258 MachineInstrBuilder MIB1 =
9259 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv8f16), NewVR)
9260 .add(Root.getOperand(2));
9261 InsInstrs.push_back(MIB1);
9262 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9264 Opc = AArch64::FMLAv8f16;
9265 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9266 FMAInstKind::Accumulator, &NewVR);
9267 } else {
9268 Opc = AArch64::FMLAv8i16_indexed;
9269 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9270 FMAInstKind::Indexed, &NewVR);
9271 }
9272 break;
9273 }
9275 RC = &AArch64::FPR128RegClass;
9276 Opc = AArch64::FMLSv8f16;
9277 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9279 break;
9281 RC = &AArch64::FPR128RegClass;
9282 Opc = AArch64::FMLSv8i16_indexed;
9283 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9285 break;
9286
9289 RC = &AArch64::FPR128RegClass;
9291 Opc = AArch64::FMLSv2i64_indexed;
9292 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9294 } else {
9295 Opc = AArch64::FMLSv2f64;
9296 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9298 }
9299 break;
9300
9303 RC = &AArch64::FPR128RegClass;
9305 Opc = AArch64::FMLSv4i32_indexed;
9306 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9308 } else {
9309 Opc = AArch64::FMLSv4f32;
9310 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
9312 }
9313 break;
9316 RC = &AArch64::FPR64RegClass;
9317 Register NewVR = MRI.createVirtualRegister(RC);
9318 MachineInstrBuilder MIB1 =
9319 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f32), NewVR)
9320 .add(Root.getOperand(2));
9321 InsInstrs.push_back(MIB1);
9322 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9324 Opc = AArch64::FMLAv2i32_indexed;
9325 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9326 FMAInstKind::Indexed, &NewVR);
9327 } else {
9328 Opc = AArch64::FMLAv2f32;
9329 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9330 FMAInstKind::Accumulator, &NewVR);
9331 }
9332 break;
9333 }
9336 RC = &AArch64::FPR128RegClass;
9337 Register NewVR = MRI.createVirtualRegister(RC);
9338 MachineInstrBuilder MIB1 =
9339 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv4f32), NewVR)
9340 .add(Root.getOperand(2));
9341 InsInstrs.push_back(MIB1);
9342 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9344 Opc = AArch64::FMLAv4i32_indexed;
9345 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9346 FMAInstKind::Indexed, &NewVR);
9347 } else {
9348 Opc = AArch64::FMLAv4f32;
9349 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9350 FMAInstKind::Accumulator, &NewVR);
9351 }
9352 break;
9353 }
9356 RC = &AArch64::FPR128RegClass;
9357 Register NewVR = MRI.createVirtualRegister(RC);
9358 MachineInstrBuilder MIB1 =
9359 BuildMI(MF, MIMetadata(Root), TII->get(AArch64::FNEGv2f64), NewVR)
9360 .add(Root.getOperand(2));
9361 InsInstrs.push_back(MIB1);
9362 InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
9364 Opc = AArch64::FMLAv2i64_indexed;
9365 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9366 FMAInstKind::Indexed, &NewVR);
9367 } else {
9368 Opc = AArch64::FMLAv2f64;
9369 MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
9370 FMAInstKind::Accumulator, &NewVR);
9371 }
9372 break;
9373 }
9376 unsigned IdxDupOp =
9378 : 2;
9379 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i32_indexed,
9380 &AArch64::FPR128RegClass, MRI);
9381 break;
9382 }
9385 unsigned IdxDupOp =
9387 : 2;
9388 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv2i64_indexed,
9389 &AArch64::FPR128RegClass, MRI);
9390 break;
9391 }
9394 unsigned IdxDupOp =
9396 : 2;
9397 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i16_indexed,
9398 &AArch64::FPR128_loRegClass, MRI);
9399 break;
9400 }
9403 unsigned IdxDupOp =
9405 : 2;
9406 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv4i32_indexed,
9407 &AArch64::FPR128RegClass, MRI);
9408 break;
9409 }
9412 unsigned IdxDupOp =
9414 : 2;
9415 genIndexedMultiply(Root, InsInstrs, IdxDupOp, AArch64::FMULv8i16_indexed,
9416 &AArch64::FPR128_loRegClass, MRI);
9417 break;
9418 }
9420 MUL = genFNegatedMAD(MF, MRI, TII, Root, InsInstrs);
9421 break;
9422 }
9424 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9425 Pattern, 4);
9426 break;
9427 }
9429 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9430 Pattern, 8);
9431 break;
9432 }
9434 generateGatherLanePattern(Root, InsInstrs, DelInstrs, InstrIdxForVirtReg,
9435 Pattern, 16);
9436 break;
9437 }
9438
9439 } // end switch (Pattern)
9440 // Record MUL and ADD/SUB for deletion
9441 if (MUL)
9442 DelInstrs.push_back(MUL);
9443 DelInstrs.push_back(&Root);
9444
9445 // Set the flags on the inserted instructions to be the merged flags of the
9446 // instructions that we have combined.
9447 uint32_t Flags = Root.getFlags();
9448 if (MUL)
9449 Flags = Root.mergeFlagsWith(*MUL);
9450 for (auto *MI : InsInstrs)
9451 MI->setFlags(Flags);
9452}
9453
9454/// Replace csincr-branch sequence by simple conditional branch
9455///
9456/// Examples:
9457/// 1. \code
9458/// csinc w9, wzr, wzr, <condition code>
9459/// tbnz w9, #0, 0x44
9460/// \endcode
9461/// to
9462/// \code
9463/// b.<inverted condition code>
9464/// \endcode
9465///
9466/// 2. \code
9467/// csinc w9, wzr, wzr, <condition code>
9468/// tbz w9, #0, 0x44
9469/// \endcode
9470/// to
9471/// \code
9472/// b.<condition code>
9473/// \endcode
9474///
9475/// Replace compare and branch sequence by TBZ/TBNZ instruction when the
9476/// compare's constant operand is power of 2.
9477///
9478/// Examples:
9479/// \code
9480/// and w8, w8, #0x400
9481/// cbnz w8, L1
9482/// \endcode
9483/// to
9484/// \code
9485/// tbnz w8, #10, L1
9486/// \endcode
9487///
9488/// \param MI Conditional Branch
9489/// \return True when the simple conditional branch is generated
9490///
9492 bool IsNegativeBranch = false;
9493 bool IsTestAndBranch = false;
9494 unsigned TargetBBInMI = 0;
9495 switch (MI.getOpcode()) {
9496 default:
9497 llvm_unreachable("Unknown branch instruction?");
9498 case AArch64::Bcc:
9499 case AArch64::CBWPri:
9500 case AArch64::CBXPri:
9501 case AArch64::CBBAssertExt:
9502 case AArch64::CBHAssertExt:
9503 case AArch64::CBWPrr:
9504 case AArch64::CBXPrr:
9505 return false;
9506 case AArch64::CBZW:
9507 case AArch64::CBZX:
9508 TargetBBInMI = 1;
9509 break;
9510 case AArch64::CBNZW:
9511 case AArch64::CBNZX:
9512 TargetBBInMI = 1;
9513 IsNegativeBranch = true;
9514 break;
9515 case AArch64::TBZW:
9516 case AArch64::TBZX:
9517 TargetBBInMI = 2;
9518 IsTestAndBranch = true;
9519 break;
9520 case AArch64::TBNZW:
9521 case AArch64::TBNZX:
9522 TargetBBInMI = 2;
9523 IsNegativeBranch = true;
9524 IsTestAndBranch = true;
9525 break;
9526 }
9527 // So we increment a zero register and test for bits other
9528 // than bit 0? Conservatively bail out in case the verifier
9529 // missed this case.
9530 if (IsTestAndBranch && MI.getOperand(1).getImm())
9531 return false;
9532
9533 // Find Definition.
9534 assert(MI.getParent() && "Incomplete machine instruction\n");
9535 MachineBasicBlock *MBB = MI.getParent();
9536 MachineFunction *MF = MBB->getParent();
9538 Register VReg = MI.getOperand(0).getReg();
9539 if (!VReg.isVirtual())
9540 return false;
9541
9542 MachineInstr *DefMI = MRI->getVRegDef(VReg);
9543
9544 // Look through COPY instructions to find definition.
9545 while (DefMI->isCopy()) {
9546 Register CopyVReg = DefMI->getOperand(1).getReg();
9547 if (!MRI->hasOneNonDBGUse(CopyVReg))
9548 return false;
9549 if (!MRI->hasOneDef(CopyVReg))
9550 return false;
9551 DefMI = MRI->getVRegDef(CopyVReg);
9552 }
9553
9554 switch (DefMI->getOpcode()) {
9555 default:
9556 return false;
9557 // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
9558 case AArch64::ANDWri:
9559 case AArch64::ANDXri: {
9560 if (IsTestAndBranch)
9561 return false;
9562 if (DefMI->getParent() != MBB)
9563 return false;
9564 if (!MRI->hasOneNonDBGUse(VReg))
9565 return false;
9566
9567 bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
9569 DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
9570 if (!isPowerOf2_64(Mask))
9571 return false;
9572
9573 MachineOperand &MO = DefMI->getOperand(1);
9574 Register NewReg = MO.getReg();
9575 if (!NewReg.isVirtual())
9576 return false;
9577
9578 assert(!MRI->def_empty(NewReg) && "Register must be defined.");
9579
9580 MachineBasicBlock &RefToMBB = *MBB;
9581 MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
9582 DebugLoc DL = MI.getDebugLoc();
9583 unsigned Imm = Log2_64(Mask);
9584 unsigned Opc = (Imm < 32)
9585 ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
9586 : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
9587 MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
9588 .addReg(NewReg)
9589 .addImm(Imm)
9590 .addMBB(TBB);
9591 // Register lives on to the CBZ now.
9592 MO.setIsKill(false);
9593
9594 // For immediate smaller than 32, we need to use the 32-bit
9595 // variant (W) in all cases. Indeed the 64-bit variant does not
9596 // allow to encode them.
9597 // Therefore, if the input register is 64-bit, we need to take the
9598 // 32-bit sub-part.
9599 if (!Is32Bit && Imm < 32)
9600 NewMI->getOperand(0).setSubReg(AArch64::sub_32);
9601 MI.eraseFromParent();
9602 return true;
9603 }
9604 // Look for CSINC
9605 case AArch64::CSINCWr:
9606 case AArch64::CSINCXr: {
9607 if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
9608 DefMI->getOperand(2).getReg() == AArch64::WZR) &&
9609 !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
9610 DefMI->getOperand(2).getReg() == AArch64::XZR))
9611 return false;
9612
9613 if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, /*TRI=*/nullptr,
9614 true) != -1)
9615 return false;
9616
9617 AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
9618 // Convert only when the condition code is not modified between
9619 // the CSINC and the branch. The CC may be used by other
9620 // instructions in between.
9622 return false;
9623 MachineBasicBlock &RefToMBB = *MBB;
9624 MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
9625 DebugLoc DL = MI.getDebugLoc();
9626 if (IsNegativeBranch)
9628 BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
9629 MI.eraseFromParent();
9630 return true;
9631 }
9632 }
9633}
9634
9635std::pair<unsigned, unsigned>
9636AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
9637 const unsigned Mask = AArch64II::MO_FRAGMENT;
9638 return std::make_pair(TF & Mask, TF & ~Mask);
9639}
9640
9642AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
9643 using namespace AArch64II;
9644
9645 static const std::pair<unsigned, const char *> TargetFlags[] = {
9646 {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
9647 {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
9648 {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
9649 {MO_HI12, "aarch64-hi12"}};
9650 return ArrayRef(TargetFlags);
9651}
9652
9654AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
9655 using namespace AArch64II;
9656
9657 static const std::pair<unsigned, const char *> TargetFlags[] = {
9658 {MO_COFFSTUB, "aarch64-coffstub"},
9659 {MO_GOT, "aarch64-got"},
9660 {MO_NC, "aarch64-nc"},
9661 {MO_S, "aarch64-s"},
9662 {MO_TLS, "aarch64-tls"},
9663 {MO_DLLIMPORT, "aarch64-dllimport"},
9664 {MO_PREL, "aarch64-prel"},
9665 {MO_TAGGED, "aarch64-tagged"},
9666 {MO_ARM64EC_CALLMANGLE, "aarch64-arm64ec-callmangle"},
9667 };
9668 return ArrayRef(TargetFlags);
9669}
9670
9672AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
9673 static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
9674 {{MOSuppressPair, "aarch64-suppress-pair"},
9675 {MOStridedAccess, "aarch64-strided-access"}};
9676 return ArrayRef(TargetFlags);
9677}
9678
9679/// Constants defining how certain sequences should be outlined.
9680/// This encompasses how an outlined function should be called, and what kind of
9681/// frame should be emitted for that outlined function.
9682///
9683/// \p MachineOutlinerDefault implies that the function should be called with
9684/// a save and restore of LR to the stack.
9685///
9686/// That is,
9687///
9688/// I1 Save LR OUTLINED_FUNCTION:
9689/// I2 --> BL OUTLINED_FUNCTION I1
9690/// I3 Restore LR I2
9691/// I3
9692/// RET
9693///
9694/// * Call construction overhead: 3 (save + BL + restore)
9695/// * Frame construction overhead: 1 (ret)
9696/// * Requires stack fixups? Yes
9697///
9698/// \p MachineOutlinerTailCall implies that the function is being created from
9699/// a sequence of instructions ending in a return.
9700///
9701/// That is,
9702///
9703/// I1 OUTLINED_FUNCTION:
9704/// I2 --> B OUTLINED_FUNCTION I1
9705/// RET I2
9706/// RET
9707///
9708/// * Call construction overhead: 1 (B)
9709/// * Frame construction overhead: 0 (Return included in sequence)
9710/// * Requires stack fixups? No
9711///
9712/// \p MachineOutlinerNoLRSave implies that the function should be called using
9713/// a BL instruction, but doesn't require LR to be saved and restored. This
9714/// happens when LR is known to be dead.
9715///
9716/// That is,
9717///
9718/// I1 OUTLINED_FUNCTION:
9719/// I2 --> BL OUTLINED_FUNCTION I1
9720/// I3 I2
9721/// I3
9722/// RET
9723///
9724/// * Call construction overhead: 1 (BL)
9725/// * Frame construction overhead: 1 (RET)
9726/// * Requires stack fixups? No
9727///
9728/// \p MachineOutlinerThunk implies that the function is being created from
9729/// a sequence of instructions ending in a call. The outlined function is
9730/// called with a BL instruction, and the outlined function tail-calls the
9731/// original call destination.
9732///
9733/// That is,
9734///
9735/// I1 OUTLINED_FUNCTION:
9736/// I2 --> BL OUTLINED_FUNCTION I1
9737/// BL f I2
9738/// B f
9739/// * Call construction overhead: 1 (BL)
9740/// * Frame construction overhead: 0
9741/// * Requires stack fixups? No
9742///
9743/// \p MachineOutlinerRegSave implies that the function should be called with a
9744/// save and restore of LR to an available register. This allows us to avoid
9745/// stack fixups. Note that this outlining variant is compatible with the
9746/// NoLRSave case.
9747///
9748/// That is,
9749///
9750/// I1 Save LR OUTLINED_FUNCTION:
9751/// I2 --> BL OUTLINED_FUNCTION I1
9752/// I3 Restore LR I2
9753/// I3
9754/// RET
9755///
9756/// * Call construction overhead: 3 (save + BL + restore)
9757/// * Frame construction overhead: 1 (ret)
9758/// * Requires stack fixups? No
9760 MachineOutlinerDefault, /// Emit a save, restore, call, and return.
9761 MachineOutlinerTailCall, /// Only emit a branch.
9762 MachineOutlinerNoLRSave, /// Emit a call and return.
9763 MachineOutlinerThunk, /// Emit a call and tail-call.
9764 MachineOutlinerRegSave /// Same as default, but save to a register.
9765};
9766
9772
9774AArch64InstrInfo::findRegisterToSaveLRTo(outliner::Candidate &C) const {
9775 MachineFunction *MF = C.getMF();
9776 const TargetRegisterInfo &TRI = *MF->getSubtarget().getRegisterInfo();
9777 const AArch64RegisterInfo *ARI =
9778 static_cast<const AArch64RegisterInfo *>(&TRI);
9779 // Check if there is an available register across the sequence that we can
9780 // use.
9781 for (unsigned Reg : AArch64::GPR64RegClass) {
9782 if (!ARI->isReservedReg(*MF, Reg) &&
9783 Reg != AArch64::LR && // LR is not reserved, but don't use it.
9784 Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
9785 Reg != AArch64::X17 && // Ditto for X17.
9786 C.isAvailableAcrossAndOutOfSeq(Reg, TRI) &&
9787 C.isAvailableInsideSeq(Reg, TRI))
9788 return Reg;
9789 }
9790 return Register();
9791}
9792
9793static bool
9795 const outliner::Candidate &b) {
9796 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9797 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9798
9799 return MFIa->getSignReturnAddressCondition() ==
9801}
9802
9803static bool
9805 const outliner::Candidate &b) {
9806 const auto &MFIa = a.getMF()->getInfo<AArch64FunctionInfo>();
9807 const auto &MFIb = b.getMF()->getInfo<AArch64FunctionInfo>();
9808
9809 return MFIa->shouldSignWithBKey() == MFIb->shouldSignWithBKey();
9810}
9811
9813 const outliner::Candidate &b) {
9814 const AArch64Subtarget &SubtargetA =
9816 const AArch64Subtarget &SubtargetB =
9817 b.getMF()->getSubtarget<AArch64Subtarget>();
9818 return SubtargetA.hasV8_3aOps() == SubtargetB.hasV8_3aOps();
9819}
9820
9821std::optional<std::unique_ptr<outliner::OutlinedFunction>>
9822AArch64InstrInfo::getOutliningCandidateInfo(
9823 const MachineModuleInfo &MMI,
9824 std::vector<outliner::Candidate> &RepeatedSequenceLocs,
9825 unsigned MinRepeats) const {
9826 unsigned SequenceSize = 0;
9827 for (auto &MI : RepeatedSequenceLocs[0])
9828 SequenceSize += getInstSizeInBytes(MI);
9829
9830 unsigned NumBytesToCreateFrame = 0;
9831
9832 // Avoid splitting ADRP ADD/LDR pair into outlined functions.
9833 // These instructions are fused together by the scheduler.
9834 // Any candidate where ADRP is the last instruction should be rejected
9835 // as that will lead to splitting ADRP pair.
9836 MachineInstr &LastMI = RepeatedSequenceLocs[0].back();
9837 MachineInstr &FirstMI = RepeatedSequenceLocs[0].front();
9838 if (LastMI.getOpcode() == AArch64::ADRP &&
9839 (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_PAGE) != 0 &&
9840 (LastMI.getOperand(1).getTargetFlags() & AArch64II::MO_GOT) != 0) {
9841 return std::nullopt;
9842 }
9843
9844 // Similarly any candidate where the first instruction is ADD/LDR with a
9845 // page offset should be rejected to avoid ADRP splitting.
9846 if ((FirstMI.getOpcode() == AArch64::ADDXri ||
9847 FirstMI.getOpcode() == AArch64::LDRXui) &&
9848 (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_PAGEOFF) != 0 &&
9849 (FirstMI.getOperand(2).getTargetFlags() & AArch64II::MO_GOT) != 0) {
9850 return std::nullopt;
9851 }
9852
9853 // We only allow outlining for functions having exactly matching return
9854 // address signing attributes, i.e., all share the same value for the
9855 // attribute "sign-return-address" and all share the same type of key they
9856 // are signed with.
9857 // Additionally we require all functions to simultaneously either support
9858 // v8.3a features or not. Otherwise an outlined function could get signed
9859 // using dedicated v8.3 instructions and a call from a function that doesn't
9860 // support v8.3 instructions would therefore be invalid.
9861 if (std::adjacent_find(
9862 RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
9863 [](const outliner::Candidate &a, const outliner::Candidate &b) {
9864 // Return true if a and b are non-equal w.r.t. return address
9865 // signing or support of v8.3a features
9866 if (outliningCandidatesSigningScopeConsensus(a, b) &&
9867 outliningCandidatesSigningKeyConsensus(a, b) &&
9868 outliningCandidatesV8_3OpsConsensus(a, b)) {
9869 return false;
9870 }
9871 return true;
9872 }) != RepeatedSequenceLocs.end()) {
9873 return std::nullopt;
9874 }
9875
9876 // Since at this point all candidates agree on their return address signing
9877 // picking just one is fine. If the candidate functions potentially sign their
9878 // return addresses, the outlined function should do the same. Note that in
9879 // the case of "sign-return-address"="non-leaf" this is an assumption: It is
9880 // not certainly true that the outlined function will have to sign its return
9881 // address but this decision is made later, when the decision to outline
9882 // has already been made.
9883 // The same holds for the number of additional instructions we need: On
9884 // v8.3a RET can be replaced by RETAA/RETAB and no AUT instruction is
9885 // necessary. However, at this point we don't know if the outlined function
9886 // will have a RET instruction so we assume the worst.
9887 const TargetRegisterInfo &TRI = getRegisterInfo();
9888 // Performing a tail call may require extra checks when PAuth is enabled.
9889 // If PAuth is disabled, set it to zero for uniformity.
9890 unsigned NumBytesToCheckLRInTCEpilogue = 0;
9891 const auto RASignCondition = RepeatedSequenceLocs[0]
9892 .getMF()
9893 ->getInfo<AArch64FunctionInfo>()
9894 ->getSignReturnAddressCondition();
9895 if (RASignCondition != SignReturnAddress::None) {
9896 // One PAC and one AUT instructions
9897 NumBytesToCreateFrame += 8;
9898
9899 // PAuth is enabled - set extra tail call cost, if any.
9900 auto LRCheckMethod = Subtarget.getAuthenticatedLRCheckMethod(
9901 *RepeatedSequenceLocs[0].getMF());
9902 NumBytesToCheckLRInTCEpilogue =
9904 // Checking the authenticated LR value may significantly impact
9905 // SequenceSize, so account for it for more precise results.
9906 if (isTailCallReturnInst(RepeatedSequenceLocs[0].back()))
9907 SequenceSize += NumBytesToCheckLRInTCEpilogue;
9908
9909 // We have to check if sp modifying instructions would get outlined.
9910 // If so we only allow outlining if sp is unchanged overall, so matching
9911 // sub and add instructions are okay to outline, all other sp modifications
9912 // are not
9913 auto hasIllegalSPModification = [&TRI](outliner::Candidate &C) {
9914 int SPValue = 0;
9915 for (auto &MI : C) {
9916 if (MI.modifiesRegister(AArch64::SP, &TRI)) {
9917 switch (MI.getOpcode()) {
9918 case AArch64::ADDXri:
9919 case AArch64::ADDWri:
9920 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9921 assert(MI.getOperand(2).isImm() &&
9922 "Expected operand to be immediate");
9923 assert(MI.getOperand(1).isReg() &&
9924 "Expected operand to be a register");
9925 // Check if the add just increments sp. If so, we search for
9926 // matching sub instructions that decrement sp. If not, the
9927 // modification is illegal
9928 if (MI.getOperand(1).getReg() == AArch64::SP)
9929 SPValue += MI.getOperand(2).getImm();
9930 else
9931 return true;
9932 break;
9933 case AArch64::SUBXri:
9934 case AArch64::SUBWri:
9935 assert(MI.getNumOperands() == 4 && "Wrong number of operands");
9936 assert(MI.getOperand(2).isImm() &&
9937 "Expected operand to be immediate");
9938 assert(MI.getOperand(1).isReg() &&
9939 "Expected operand to be a register");
9940 // Check if the sub just decrements sp. If so, we search for
9941 // matching add instructions that increment sp. If not, the
9942 // modification is illegal
9943 if (MI.getOperand(1).getReg() == AArch64::SP)
9944 SPValue -= MI.getOperand(2).getImm();
9945 else
9946 return true;
9947 break;
9948 default:
9949 return true;
9950 }
9951 }
9952 }
9953 if (SPValue)
9954 return true;
9955 return false;
9956 };
9957 // Remove candidates with illegal stack modifying instructions
9958 llvm::erase_if(RepeatedSequenceLocs, hasIllegalSPModification);
9959
9960 // If the sequence doesn't have enough candidates left, then we're done.
9961 if (RepeatedSequenceLocs.size() < MinRepeats)
9962 return std::nullopt;
9963 }
9964
9965 // Properties about candidate MBBs that hold for all of them.
9966 unsigned FlagsSetInAll = 0xF;
9967
9968 // Compute liveness information for each candidate, and set FlagsSetInAll.
9969 for (outliner::Candidate &C : RepeatedSequenceLocs)
9970 FlagsSetInAll &= C.Flags;
9971
9972 unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back().getOpcode();
9973
9974 // Helper lambda which sets call information for every candidate.
9975 auto SetCandidateCallInfo =
9976 [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
9977 for (outliner::Candidate &C : RepeatedSequenceLocs)
9978 C.setCallInfo(CallID, NumBytesForCall);
9979 };
9980
9981 unsigned FrameID = MachineOutlinerDefault;
9982 NumBytesToCreateFrame += 4;
9983
9984 bool HasBTI = any_of(RepeatedSequenceLocs, [](outliner::Candidate &C) {
9985 return C.getMF()->getInfo<AArch64FunctionInfo>()->branchTargetEnforcement();
9986 });
9987
9988 // We check to see if CFI Instructions are present, and if they are
9989 // we find the number of CFI Instructions in the candidates.
9990 unsigned CFICount = 0;
9991 for (auto &I : RepeatedSequenceLocs[0]) {
9992 if (I.isCFIInstruction())
9993 CFICount++;
9994 }
9995
9996 // We compare the number of found CFI Instructions to the number of CFI
9997 // instructions in the parent function for each candidate. We must check this
9998 // since if we outline one of the CFI instructions in a function, we have to
9999 // outline them all for correctness. If we do not, the address offsets will be
10000 // incorrect between the two sections of the program.
10001 for (outliner::Candidate &C : RepeatedSequenceLocs) {
10002 std::vector<MCCFIInstruction> CFIInstructions =
10003 C.getMF()->getFrameInstructions();
10004
10005 if (CFICount > 0 && CFICount != CFIInstructions.size())
10006 return std::nullopt;
10007 }
10008
10009 // Returns true if an instructions is safe to fix up, false otherwise.
10010 auto IsSafeToFixup = [this, &TRI](MachineInstr &MI) {
10011 if (MI.isCall())
10012 return true;
10013
10014 if (!MI.modifiesRegister(AArch64::SP, &TRI) &&
10015 !MI.readsRegister(AArch64::SP, &TRI))
10016 return true;
10017
10018 // Any modification of SP will break our code to save/restore LR.
10019 // FIXME: We could handle some instructions which add a constant
10020 // offset to SP, with a bit more work.
10021 if (MI.modifiesRegister(AArch64::SP, &TRI))
10022 return false;
10023
10024 // At this point, we have a stack instruction that we might need to
10025 // fix up. We'll handle it if it's a load or store.
10026 if (MI.mayLoadOrStore()) {
10027 const MachineOperand *Base; // Filled with the base operand of MI.
10028 int64_t Offset; // Filled with the offset of MI.
10029 bool OffsetIsScalable;
10030
10031 // Does it allow us to offset the base operand and is the base the
10032 // register SP?
10033 if (!getMemOperandWithOffset(MI, Base, Offset, OffsetIsScalable, &TRI) ||
10034 !Base->isReg() || Base->getReg() != AArch64::SP)
10035 return false;
10036
10037 // Fixe-up code below assumes bytes.
10038 if (OffsetIsScalable)
10039 return false;
10040
10041 // Find the minimum/maximum offset for this instruction and check
10042 // if fixing it up would be in range.
10043 int64_t MinOffset,
10044 MaxOffset; // Unscaled offsets for the instruction.
10045 // The scale to multiply the offsets by.
10046 TypeSize Scale(0U, false), DummyWidth(0U, false);
10047 getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
10048
10049 Offset += 16; // Update the offset to what it would be if we outlined.
10050 if (Offset < MinOffset * (int64_t)Scale.getFixedValue() ||
10051 Offset > MaxOffset * (int64_t)Scale.getFixedValue())
10052 return false;
10053
10054 // It's in range, so we can outline it.
10055 return true;
10056 }
10057
10058 // FIXME: Add handling for instructions like "add x0, sp, #8".
10059
10060 // We can't fix it up, so don't outline it.
10061 return false;
10062 };
10063
10064 // True if it's possible to fix up each stack instruction in this sequence.
10065 // Important for frames/call variants that modify the stack.
10066 bool AllStackInstrsSafe =
10067 llvm::all_of(RepeatedSequenceLocs[0], IsSafeToFixup);
10068
10069 // If the last instruction in any candidate is a terminator, then we should
10070 // tail call all of the candidates.
10071 if (RepeatedSequenceLocs[0].back().isTerminator()) {
10072 FrameID = MachineOutlinerTailCall;
10073 NumBytesToCreateFrame = 0;
10074 unsigned NumBytesForCall = 4 + NumBytesToCheckLRInTCEpilogue;
10075 SetCandidateCallInfo(MachineOutlinerTailCall, NumBytesForCall);
10076 }
10077
10078 else if (LastInstrOpcode == AArch64::BL ||
10079 ((LastInstrOpcode == AArch64::BLR ||
10080 LastInstrOpcode == AArch64::BLRNoIP) &&
10081 !HasBTI)) {
10082 // FIXME: Do we need to check if the code after this uses the value of LR?
10083 FrameID = MachineOutlinerThunk;
10084 NumBytesToCreateFrame = NumBytesToCheckLRInTCEpilogue;
10085 SetCandidateCallInfo(MachineOutlinerThunk, 4);
10086 }
10087
10088 else {
10089 // We need to decide how to emit calls + frames. We can always emit the same
10090 // frame if we don't need to save to the stack. If we have to save to the
10091 // stack, then we need a different frame.
10092 unsigned NumBytesNoStackCalls = 0;
10093 std::vector<outliner::Candidate> CandidatesWithoutStackFixups;
10094
10095 // Check if we have to save LR.
10096 for (outliner::Candidate &C : RepeatedSequenceLocs) {
10097 bool LRAvailable =
10099 ? C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI)
10100 : true;
10101 // If we have a noreturn caller, then we're going to be conservative and
10102 // say that we have to save LR. If we don't have a ret at the end of the
10103 // block, then we can't reason about liveness accurately.
10104 //
10105 // FIXME: We can probably do better than always disabling this in
10106 // noreturn functions by fixing up the liveness info.
10107 bool IsNoReturn =
10108 C.getMF()->getFunction().hasFnAttribute(Attribute::NoReturn);
10109
10110 // Is LR available? If so, we don't need a save.
10111 if (LRAvailable && !IsNoReturn) {
10112 NumBytesNoStackCalls += 4;
10113 C.setCallInfo(MachineOutlinerNoLRSave, 4);
10114 CandidatesWithoutStackFixups.push_back(C);
10115 }
10116
10117 // Is an unused register available? If so, we won't modify the stack, so
10118 // we can outline with the same frame type as those that don't save LR.
10119 else if (findRegisterToSaveLRTo(C)) {
10120 NumBytesNoStackCalls += 12;
10121 C.setCallInfo(MachineOutlinerRegSave, 12);
10122 CandidatesWithoutStackFixups.push_back(C);
10123 }
10124
10125 // Is SP used in the sequence at all? If not, we don't have to modify
10126 // the stack, so we are guaranteed to get the same frame.
10127 else if (C.isAvailableInsideSeq(AArch64::SP, TRI)) {
10128 NumBytesNoStackCalls += 12;
10129 C.setCallInfo(MachineOutlinerDefault, 12);
10130 CandidatesWithoutStackFixups.push_back(C);
10131 }
10132
10133 // If we outline this, we need to modify the stack. Pretend we don't
10134 // outline this by saving all of its bytes.
10135 else {
10136 NumBytesNoStackCalls += SequenceSize;
10137 }
10138 }
10139
10140 // If there are no places where we have to save LR, then note that we
10141 // don't have to update the stack. Otherwise, give every candidate the
10142 // default call type, as long as it's safe to do so.
10143 if (!AllStackInstrsSafe ||
10144 NumBytesNoStackCalls <= RepeatedSequenceLocs.size() * 12) {
10145 RepeatedSequenceLocs = CandidatesWithoutStackFixups;
10146 FrameID = MachineOutlinerNoLRSave;
10147 if (RepeatedSequenceLocs.size() < MinRepeats)
10148 return std::nullopt;
10149 } else {
10150 SetCandidateCallInfo(MachineOutlinerDefault, 12);
10151
10152 // Bugzilla ID: 46767
10153 // TODO: Check if fixing up the stack more than once is safe so we can
10154 // outline these.
10155 //
10156 // An outline resulting in a caller that requires stack fixups at the
10157 // callsite to a callee that also requires stack fixups can happen when
10158 // there are no available registers at the candidate callsite for a
10159 // candidate that itself also has calls.
10160 //
10161 // In other words if function_containing_sequence in the following pseudo
10162 // assembly requires that we save LR at the point of the call, but there
10163 // are no available registers: in this case we save using SP and as a
10164 // result the SP offsets requires stack fixups by multiples of 16.
10165 //
10166 // function_containing_sequence:
10167 // ...
10168 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
10169 // call OUTLINED_FUNCTION_N
10170 // restore LR from SP
10171 // ...
10172 //
10173 // OUTLINED_FUNCTION_N:
10174 // save LR to SP <- Requires stack instr fixups in OUTLINED_FUNCTION_N
10175 // ...
10176 // bl foo
10177 // restore LR from SP
10178 // ret
10179 //
10180 // Because the code to handle more than one stack fixup does not
10181 // currently have the proper checks for legality, these cases will assert
10182 // in the AArch64 MachineOutliner. This is because the code to do this
10183 // needs more hardening, testing, better checks that generated code is
10184 // legal, etc and because it is only verified to handle a single pass of
10185 // stack fixup.
10186 //
10187 // The assert happens in AArch64InstrInfo::buildOutlinedFrame to catch
10188 // these cases until they are known to be handled. Bugzilla 46767 is
10189 // referenced in comments at the assert site.
10190 //
10191 // To avoid asserting (or generating non-legal code on noassert builds)
10192 // we remove all candidates which would need more than one stack fixup by
10193 // pruning the cases where the candidate has calls while also having no
10194 // available LR and having no available general purpose registers to copy
10195 // LR to (ie one extra stack save/restore).
10196 //
10197 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
10198 erase_if(RepeatedSequenceLocs, [this, &TRI](outliner::Candidate &C) {
10199 auto IsCall = [](const MachineInstr &MI) { return MI.isCall(); };
10200 return (llvm::any_of(C, IsCall)) &&
10201 (!C.isAvailableAcrossAndOutOfSeq(AArch64::LR, TRI) ||
10202 !findRegisterToSaveLRTo(C));
10203 });
10204 }
10205 }
10206
10207 // If we dropped all of the candidates, bail out here.
10208 if (RepeatedSequenceLocs.size() < MinRepeats)
10209 return std::nullopt;
10210 }
10211
10212 // Does every candidate's MBB contain a call? If so, then we might have a call
10213 // in the range.
10214 if (FlagsSetInAll & MachineOutlinerMBBFlags::HasCalls) {
10215 // Check if the range contains a call. These require a save + restore of the
10216 // link register.
10217 outliner::Candidate &FirstCand = RepeatedSequenceLocs[0];
10218 bool ModStackToSaveLR = false;
10219 if (any_of(drop_end(FirstCand),
10220 [](const MachineInstr &MI) { return MI.isCall(); }))
10221 ModStackToSaveLR = true;
10222
10223 // Handle the last instruction separately. If this is a tail call, then the
10224 // last instruction is a call. We don't want to save + restore in this case.
10225 // However, it could be possible that the last instruction is a call without
10226 // it being valid to tail call this sequence. We should consider this as
10227 // well.
10228 else if (FrameID != MachineOutlinerThunk &&
10229 FrameID != MachineOutlinerTailCall && FirstCand.back().isCall())
10230 ModStackToSaveLR = true;
10231
10232 if (ModStackToSaveLR) {
10233 // We can't fix up the stack. Bail out.
10234 if (!AllStackInstrsSafe)
10235 return std::nullopt;
10236
10237 // Save + restore LR.
10238 NumBytesToCreateFrame += 8;
10239 }
10240 }
10241
10242 // If we have CFI instructions, we can only outline if the outlined section
10243 // can be a tail call
10244 if (FrameID != MachineOutlinerTailCall && CFICount > 0)
10245 return std::nullopt;
10246
10247 return std::make_unique<outliner::OutlinedFunction>(
10248 RepeatedSequenceLocs, SequenceSize, NumBytesToCreateFrame, FrameID);
10249}
10250
10251void AArch64InstrInfo::mergeOutliningCandidateAttributes(
10252 Function &F, std::vector<outliner::Candidate> &Candidates) const {
10253 // If a bunch of candidates reach this point they must agree on their return
10254 // address signing. It is therefore enough to just consider the signing
10255 // behaviour of one of them
10256 const auto &CFn = Candidates.front().getMF()->getFunction();
10257
10258 if (CFn.hasFnAttribute("ptrauth-returns"))
10259 F.addFnAttr(CFn.getFnAttribute("ptrauth-returns"));
10260 if (CFn.hasFnAttribute("ptrauth-auth-traps"))
10261 F.addFnAttr(CFn.getFnAttribute("ptrauth-auth-traps"));
10262 // Since all candidates belong to the same module, just copy the
10263 // function-level attributes of an arbitrary function.
10264 if (CFn.hasFnAttribute("sign-return-address"))
10265 F.addFnAttr(CFn.getFnAttribute("sign-return-address"));
10266 if (CFn.hasFnAttribute("sign-return-address-key"))
10267 F.addFnAttr(CFn.getFnAttribute("sign-return-address-key"));
10268
10269 AArch64GenInstrInfo::mergeOutliningCandidateAttributes(F, Candidates);
10270}
10271
10272bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
10273 MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
10274 const Function &F = MF.getFunction();
10275
10276 // Can F be deduplicated by the linker? If it can, don't outline from it.
10277 if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
10278 return false;
10279
10280 // Don't outline from functions with section markings; the program could
10281 // expect that all the code is in the named section.
10282 // FIXME: Allow outlining from multiple functions with the same section
10283 // marking.
10284 if (F.hasSection())
10285 return false;
10286
10287 // Outlining from functions with redzones is unsafe since the outliner may
10288 // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
10289 // outline from it.
10290 AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
10291 if (!AFI || AFI->hasRedZone().value_or(true))
10292 return false;
10293
10294 // FIXME: Determine whether it is safe to outline from functions which contain
10295 // streaming-mode changes. We may need to ensure any smstart/smstop pairs are
10296 // outlined together and ensure it is safe to outline with async unwind info,
10297 // required for saving & restoring VG around calls.
10298 if (AFI->hasStreamingModeChanges())
10299 return false;
10300
10301 // FIXME: Teach the outliner to generate/handle Windows unwind info.
10303 return false;
10304
10305 // It's safe to outline from MF.
10306 return true;
10307}
10308
10310AArch64InstrInfo::getOutlinableRanges(MachineBasicBlock &MBB,
10311 unsigned &Flags) const {
10313 "Must track liveness!");
10315 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>>
10316 Ranges;
10317 // According to the AArch64 Procedure Call Standard, the following are
10318 // undefined on entry/exit from a function call:
10319 //
10320 // * Registers x16, x17, (and thus w16, w17)
10321 // * Condition codes (and thus the NZCV register)
10322 //
10323 // If any of these registers are used inside or live across an outlined
10324 // function, then they may be modified later, either by the compiler or
10325 // some other tool (like the linker).
10326 //
10327 // To avoid outlining in these situations, partition each block into ranges
10328 // where these registers are dead. We will only outline from those ranges.
10329 LiveRegUnits LRU(getRegisterInfo());
10330 auto AreAllUnsafeRegsDead = [&LRU]() {
10331 return LRU.available(AArch64::W16) && LRU.available(AArch64::W17) &&
10332 LRU.available(AArch64::NZCV);
10333 };
10334
10335 // We need to know if LR is live across an outlining boundary later on in
10336 // order to decide how we'll create the outlined call, frame, etc.
10337 //
10338 // It's pretty expensive to check this for *every candidate* within a block.
10339 // That's some potentially n^2 behaviour, since in the worst case, we'd need
10340 // to compute liveness from the end of the block for O(n) candidates within
10341 // the block.
10342 //
10343 // So, to improve the average case, let's keep track of liveness from the end
10344 // of the block to the beginning of *every outlinable range*. If we know that
10345 // LR is available in every range we could outline from, then we know that
10346 // we don't need to check liveness for any candidate within that range.
10347 bool LRAvailableEverywhere = true;
10348 // Compute liveness bottom-up.
10349 LRU.addLiveOuts(MBB);
10350 // Update flags that require info about the entire MBB.
10351 auto UpdateWholeMBBFlags = [&Flags](const MachineInstr &MI) {
10352 if (MI.isCall() && !MI.isTerminator())
10354 };
10355 // Range: [RangeBegin, RangeEnd)
10356 MachineBasicBlock::instr_iterator RangeBegin, RangeEnd;
10357 unsigned RangeLen;
10358 auto CreateNewRangeStartingAt =
10359 [&RangeBegin, &RangeEnd,
10360 &RangeLen](MachineBasicBlock::instr_iterator NewBegin) {
10361 RangeBegin = NewBegin;
10362 RangeEnd = std::next(RangeBegin);
10363 RangeLen = 0;
10364 };
10365 auto SaveRangeIfNonEmpty = [&RangeLen, &Ranges, &RangeBegin, &RangeEnd]() {
10366 // At least one unsafe register is not dead. We do not want to outline at
10367 // this point. If it is long enough to outline from and does not cross a
10368 // bundle boundary, save the range [RangeBegin, RangeEnd).
10369 if (RangeLen <= 1)
10370 return;
10371 if (!RangeBegin.isEnd() && RangeBegin->isBundledWithPred())
10372 return;
10373 if (!RangeEnd.isEnd() && RangeEnd->isBundledWithPred())
10374 return;
10375 Ranges.emplace_back(RangeBegin, RangeEnd);
10376 };
10377 // Find the first point where all unsafe registers are dead.
10378 // FIND: <safe instr> <-- end of first potential range
10379 // SKIP: <unsafe def>
10380 // SKIP: ... everything between ...
10381 // SKIP: <unsafe use>
10382 auto FirstPossibleEndPt = MBB.instr_rbegin();
10383 for (; FirstPossibleEndPt != MBB.instr_rend(); ++FirstPossibleEndPt) {
10384 LRU.stepBackward(*FirstPossibleEndPt);
10385 // Update flags that impact how we outline across the entire block,
10386 // regardless of safety.
10387 UpdateWholeMBBFlags(*FirstPossibleEndPt);
10388 if (AreAllUnsafeRegsDead())
10389 break;
10390 }
10391 // If we exhausted the entire block, we have no safe ranges to outline.
10392 if (FirstPossibleEndPt == MBB.instr_rend())
10393 return Ranges;
10394 // Current range.
10395 CreateNewRangeStartingAt(FirstPossibleEndPt->getIterator());
10396 // StartPt points to the first place where all unsafe registers
10397 // are dead (if there is any such point). Begin partitioning the MBB into
10398 // ranges.
10399 for (auto &MI : make_range(FirstPossibleEndPt, MBB.instr_rend())) {
10400 LRU.stepBackward(MI);
10401 UpdateWholeMBBFlags(MI);
10402 if (!AreAllUnsafeRegsDead()) {
10403 SaveRangeIfNonEmpty();
10404 CreateNewRangeStartingAt(MI.getIterator());
10405 continue;
10406 }
10407 LRAvailableEverywhere &= LRU.available(AArch64::LR);
10408 RangeBegin = MI.getIterator();
10409 ++RangeLen;
10410 }
10411 // Above loop misses the last (or only) range. If we are still safe, then
10412 // let's save the range.
10413 if (AreAllUnsafeRegsDead())
10414 SaveRangeIfNonEmpty();
10415 if (Ranges.empty())
10416 return Ranges;
10417 // We found the ranges bottom-up. Mapping expects the top-down. Reverse
10418 // the order.
10419 std::reverse(Ranges.begin(), Ranges.end());
10420 // If there is at least one outlinable range where LR is unavailable
10421 // somewhere, remember that.
10422 if (!LRAvailableEverywhere)
10424 return Ranges;
10425}
10426
10428AArch64InstrInfo::getOutliningTypeImpl(const MachineModuleInfo &MMI,
10430 unsigned Flags) const {
10431 MachineInstr &MI = *MIT;
10432
10433 // Don't outline anything used for return address signing. The outlined
10434 // function will get signed later if needed
10435 switch (MI.getOpcode()) {
10436 case AArch64::PACM:
10437 case AArch64::PACIASP:
10438 case AArch64::PACIBSP:
10439 case AArch64::PACIASPPC:
10440 case AArch64::PACIBSPPC:
10441 case AArch64::AUTIASP:
10442 case AArch64::AUTIBSP:
10443 case AArch64::AUTIASPPCi:
10444 case AArch64::AUTIASPPCr:
10445 case AArch64::AUTIBSPPCi:
10446 case AArch64::AUTIBSPPCr:
10447 case AArch64::RETAA:
10448 case AArch64::RETAB:
10449 case AArch64::RETAASPPCi:
10450 case AArch64::RETAASPPCr:
10451 case AArch64::RETABSPPCi:
10452 case AArch64::RETABSPPCr:
10453 case AArch64::EMITBKEY:
10454 case AArch64::PAUTH_PROLOGUE:
10455 case AArch64::PAUTH_EPILOGUE:
10457 }
10458
10459 // We can only outline these if we will tail call the outlined function, or
10460 // fix up the CFI offsets. Currently, CFI instructions are outlined only if
10461 // in a tail call.
10462 //
10463 // FIXME: If the proper fixups for the offset are implemented, this should be
10464 // possible.
10465 if (MI.isCFIInstruction())
10467
10468 // Is this a terminator for a basic block?
10469 if (MI.isTerminator())
10470 // TargetInstrInfo::getOutliningType has already filtered out anything
10471 // that would break this, so we can allow it here.
10473
10474 // Make sure none of the operands are un-outlinable.
10475 for (const MachineOperand &MOP : MI.operands()) {
10476 // A check preventing CFI indices was here before, but only CFI
10477 // instructions should have those.
10478 assert(!MOP.isCFIIndex());
10479
10480 // If it uses LR or W30 explicitly, then don't touch it.
10481 if (MOP.isReg() && !MOP.isImplicit() &&
10482 (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
10484 }
10485
10486 // Special cases for instructions that can always be outlined, but will fail
10487 // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
10488 // be outlined because they don't require a *specific* value to be in LR.
10489 if (MI.getOpcode() == AArch64::ADRP)
10491
10492 // If MI is a call we might be able to outline it. We don't want to outline
10493 // any calls that rely on the position of items on the stack. When we outline
10494 // something containing a call, we have to emit a save and restore of LR in
10495 // the outlined function. Currently, this always happens by saving LR to the
10496 // stack. Thus, if we outline, say, half the parameters for a function call
10497 // plus the call, then we'll break the callee's expectations for the layout
10498 // of the stack.
10499 //
10500 // FIXME: Allow calls to functions which construct a stack frame, as long
10501 // as they don't access arguments on the stack.
10502 // FIXME: Figure out some way to analyze functions defined in other modules.
10503 // We should be able to compute the memory usage based on the IR calling
10504 // convention, even if we can't see the definition.
10505 if (MI.isCall()) {
10506 // Get the function associated with the call. Look at each operand and find
10507 // the one that represents the callee and get its name.
10508 const Function *Callee = nullptr;
10509 for (const MachineOperand &MOP : MI.operands()) {
10510 if (MOP.isGlobal()) {
10511 Callee = dyn_cast<Function>(MOP.getGlobal());
10512 break;
10513 }
10514 }
10515
10516 // Never outline calls to mcount. There isn't any rule that would require
10517 // this, but the Linux kernel's "ftrace" feature depends on it.
10518 if (Callee && Callee->getName() == "\01_mcount")
10520
10521 // If we don't know anything about the callee, assume it depends on the
10522 // stack layout of the caller. In that case, it's only legal to outline
10523 // as a tail-call. Explicitly list the call instructions we know about so we
10524 // don't get unexpected results with call pseudo-instructions.
10525 auto UnknownCallOutlineType = outliner::InstrType::Illegal;
10526 if (MI.getOpcode() == AArch64::BLR ||
10527 MI.getOpcode() == AArch64::BLRNoIP || MI.getOpcode() == AArch64::BL)
10528 UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
10529
10530 if (!Callee)
10531 return UnknownCallOutlineType;
10532
10533 // We have a function we have information about. Check it if it's something
10534 // can safely outline.
10535 MachineFunction *CalleeMF = MMI.getMachineFunction(*Callee);
10536
10537 // We don't know what's going on with the callee at all. Don't touch it.
10538 if (!CalleeMF)
10539 return UnknownCallOutlineType;
10540
10541 // Check if we know anything about the callee saves on the function. If we
10542 // don't, then don't touch it, since that implies that we haven't
10543 // computed anything about its stack frame yet.
10544 MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
10545 if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
10546 MFI.getNumObjects() > 0)
10547 return UnknownCallOutlineType;
10548
10549 // At this point, we can say that CalleeMF ought to not pass anything on the
10550 // stack. Therefore, we can outline it.
10552 }
10553
10554 // Don't touch the link register or W30.
10555 if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
10556 MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
10558
10559 // Don't outline BTI instructions, because that will prevent the outlining
10560 // site from being indirectly callable.
10561 if (hasBTISemantics(MI))
10563
10565}
10566
10567void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
10568 for (MachineInstr &MI : MBB) {
10569 const MachineOperand *Base;
10570 TypeSize Width(0, false);
10571 int64_t Offset;
10572 bool OffsetIsScalable;
10573
10574 // Is this a load or store with an immediate offset with SP as the base?
10575 if (!MI.mayLoadOrStore() ||
10576 !getMemOperandWithOffsetWidth(MI, Base, Offset, OffsetIsScalable, Width,
10577 &RI) ||
10578 (Base->isReg() && Base->getReg() != AArch64::SP))
10579 continue;
10580
10581 // It is, so we have to fix it up.
10582 TypeSize Scale(0U, false);
10583 int64_t Dummy1, Dummy2;
10584
10585 MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
10586 assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
10587 getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
10588 assert(Scale != 0 && "Unexpected opcode!");
10589 assert(!OffsetIsScalable && "Expected offset to be a byte offset");
10590
10591 // We've pushed the return address to the stack, so add 16 to the offset.
10592 // This is safe, since we already checked if it would overflow when we
10593 // checked if this instruction was legal to outline.
10594 int64_t NewImm = (Offset + 16) / (int64_t)Scale.getFixedValue();
10595 StackOffsetOperand.setImm(NewImm);
10596 }
10597}
10598
10600 const AArch64InstrInfo *TII,
10601 bool ShouldSignReturnAddr) {
10602 if (!ShouldSignReturnAddr)
10603 return;
10604
10605 BuildMI(MBB, MBB.begin(), DebugLoc(), TII->get(AArch64::PAUTH_PROLOGUE))
10607 BuildMI(MBB, MBB.getFirstInstrTerminator(), DebugLoc(),
10608 TII->get(AArch64::PAUTH_EPILOGUE))
10610}
10611
10612void AArch64InstrInfo::buildOutlinedFrame(
10614 const outliner::OutlinedFunction &OF) const {
10615
10616 AArch64FunctionInfo *FI = MF.getInfo<AArch64FunctionInfo>();
10617
10618 if (OF.FrameConstructionID == MachineOutlinerTailCall)
10619 FI->setOutliningStyle("Tail Call");
10620 else if (OF.FrameConstructionID == MachineOutlinerThunk) {
10621 // For thunk outlining, rewrite the last instruction from a call to a
10622 // tail-call.
10623 MachineInstr *Call = &*--MBB.instr_end();
10624 unsigned TailOpcode;
10625 if (Call->getOpcode() == AArch64::BL) {
10626 TailOpcode = AArch64::TCRETURNdi;
10627 } else {
10628 assert(Call->getOpcode() == AArch64::BLR ||
10629 Call->getOpcode() == AArch64::BLRNoIP);
10630 TailOpcode = AArch64::TCRETURNriALL;
10631 }
10632 MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
10633 .add(Call->getOperand(0))
10634 .addImm(0);
10635 MBB.insert(MBB.end(), TC);
10637
10638 FI->setOutliningStyle("Thunk");
10639 }
10640
10641 bool IsLeafFunction = true;
10642
10643 // Is there a call in the outlined range?
10644 auto IsNonTailCall = [](const MachineInstr &MI) {
10645 return MI.isCall() && !MI.isReturn();
10646 };
10647
10648 if (llvm::any_of(MBB.instrs(), IsNonTailCall)) {
10649 // Fix up the instructions in the range, since we're going to modify the
10650 // stack.
10651
10652 // Bugzilla ID: 46767
10653 // TODO: Check if fixing up twice is safe so we can outline these.
10654 assert(OF.FrameConstructionID != MachineOutlinerDefault &&
10655 "Can only fix up stack references once");
10656 fixupPostOutline(MBB);
10657
10658 IsLeafFunction = false;
10659
10660 // LR has to be a live in so that we can save it.
10661 if (!MBB.isLiveIn(AArch64::LR))
10662 MBB.addLiveIn(AArch64::LR);
10663
10666
10667 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10668 OF.FrameConstructionID == MachineOutlinerThunk)
10669 Et = std::prev(MBB.end());
10670
10671 // Insert a save before the outlined region
10672 MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10673 .addReg(AArch64::SP, RegState::Define)
10674 .addReg(AArch64::LR)
10675 .addReg(AArch64::SP)
10676 .addImm(-16);
10677 It = MBB.insert(It, STRXpre);
10678
10679 if (MF.getInfo<AArch64FunctionInfo>()->needsDwarfUnwindInfo(MF)) {
10680 CFIInstBuilder CFIBuilder(MBB, It, MachineInstr::FrameSetup);
10681
10682 // Add a CFI saying the stack was moved 16 B down.
10683 CFIBuilder.buildDefCFAOffset(16);
10684
10685 // Add a CFI saying that the LR that we want to find is now 16 B higher
10686 // than before.
10687 CFIBuilder.buildOffset(AArch64::LR, -16);
10688 }
10689
10690 // Insert a restore before the terminator for the function.
10691 MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10692 .addReg(AArch64::SP, RegState::Define)
10693 .addReg(AArch64::LR, RegState::Define)
10694 .addReg(AArch64::SP)
10695 .addImm(16);
10696 Et = MBB.insert(Et, LDRXpost);
10697 }
10698
10699 auto RASignCondition = FI->getSignReturnAddressCondition();
10700 bool ShouldSignReturnAddr = AArch64FunctionInfo::shouldSignReturnAddress(
10701 RASignCondition, !IsLeafFunction);
10702
10703 // If this is a tail call outlined function, then there's already a return.
10704 if (OF.FrameConstructionID == MachineOutlinerTailCall ||
10705 OF.FrameConstructionID == MachineOutlinerThunk) {
10706 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10707 return;
10708 }
10709
10710 // It's not a tail call, so we have to insert the return ourselves.
10711
10712 // LR has to be a live in so that we can return to it.
10713 if (!MBB.isLiveIn(AArch64::LR))
10714 MBB.addLiveIn(AArch64::LR);
10715
10716 MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
10717 .addReg(AArch64::LR);
10718 MBB.insert(MBB.end(), ret);
10719
10720 signOutlinedFunction(MF, MBB, this, ShouldSignReturnAddr);
10721
10722 FI->setOutliningStyle("Function");
10723
10724 // Did we have to modify the stack by saving the link register?
10725 if (OF.FrameConstructionID != MachineOutlinerDefault)
10726 return;
10727
10728 // We modified the stack.
10729 // Walk over the basic block and fix up all the stack accesses.
10730 fixupPostOutline(MBB);
10731}
10732
10733MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
10736
10737 // Are we tail calling?
10738 if (C.CallConstructionID == MachineOutlinerTailCall) {
10739 // If yes, then we can just branch to the label.
10740 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
10741 .addGlobalAddress(M.getNamedValue(MF.getName()))
10742 .addImm(0));
10743 return It;
10744 }
10745
10746 // Are we saving the link register?
10747 if (C.CallConstructionID == MachineOutlinerNoLRSave ||
10748 C.CallConstructionID == MachineOutlinerThunk) {
10749 // No, so just insert the call.
10750 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10751 .addGlobalAddress(M.getNamedValue(MF.getName())));
10752 return It;
10753 }
10754
10755 // We want to return the spot where we inserted the call.
10757
10758 // Instructions for saving and restoring LR around the call instruction we're
10759 // going to insert.
10760 MachineInstr *Save;
10761 MachineInstr *Restore;
10762 // Can we save to a register?
10763 if (C.CallConstructionID == MachineOutlinerRegSave) {
10764 // FIXME: This logic should be sunk into a target-specific interface so that
10765 // we don't have to recompute the register.
10766 Register Reg = findRegisterToSaveLRTo(C);
10767 assert(Reg && "No callee-saved register available?");
10768
10769 // LR has to be a live in so that we can save it.
10770 if (!MBB.isLiveIn(AArch64::LR))
10771 MBB.addLiveIn(AArch64::LR);
10772
10773 // Save and restore LR from Reg.
10774 Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
10775 .addReg(AArch64::XZR)
10776 .addReg(AArch64::LR)
10777 .addImm(0);
10778 Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
10779 .addReg(AArch64::XZR)
10780 .addReg(Reg)
10781 .addImm(0);
10782 } else {
10783 // We have the default case. Save and restore from SP.
10784 Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
10785 .addReg(AArch64::SP, RegState::Define)
10786 .addReg(AArch64::LR)
10787 .addReg(AArch64::SP)
10788 .addImm(-16);
10789 Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
10790 .addReg(AArch64::SP, RegState::Define)
10791 .addReg(AArch64::LR, RegState::Define)
10792 .addReg(AArch64::SP)
10793 .addImm(16);
10794 }
10795
10796 It = MBB.insert(It, Save);
10797 It++;
10798
10799 // Insert the call.
10800 It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
10801 .addGlobalAddress(M.getNamedValue(MF.getName())));
10802 CallPt = It;
10803 It++;
10804
10805 It = MBB.insert(It, Restore);
10806 return CallPt;
10807}
10808
10809bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
10810 MachineFunction &MF) const {
10811 return MF.getFunction().hasMinSize();
10812}
10813
10814void AArch64InstrInfo::buildClearRegister(Register Reg, MachineBasicBlock &MBB,
10816 DebugLoc &DL,
10817 bool AllowSideEffects) const {
10818 const MachineFunction &MF = *MBB.getParent();
10819 const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
10820 const AArch64RegisterInfo &TRI = *STI.getRegisterInfo();
10821
10822 if (TRI.isGeneralPurposeRegister(MF, Reg)) {
10823 BuildMI(MBB, Iter, DL, get(AArch64::MOVZXi), Reg).addImm(0).addImm(0);
10824 } else if (STI.isSVEorStreamingSVEAvailable()) {
10825 BuildMI(MBB, Iter, DL, get(AArch64::DUP_ZI_D), Reg)
10826 .addImm(0)
10827 .addImm(0);
10828 } else if (STI.isNeonAvailable()) {
10829 BuildMI(MBB, Iter, DL, get(AArch64::MOVIv2d_ns), Reg)
10830 .addImm(0);
10831 } else {
10832 // This is a streaming-compatible function without SVE. We don't have full
10833 // Neon (just FPRs), so we can at most use the first 64-bit sub-register.
10834 // So given `movi v..` would be illegal use `fmov d..` instead.
10835 assert(STI.hasNEON() && "Expected to have NEON.");
10836 Register Reg64 = TRI.getSubReg(Reg, AArch64::dsub);
10837 BuildMI(MBB, Iter, DL, get(AArch64::FMOVD0), Reg64);
10838 }
10839}
10840
10841std::optional<DestSourcePair>
10843
10844 // AArch64::ORRWrs and AArch64::ORRXrs with WZR/XZR reg
10845 // and zero immediate operands used as an alias for mov instruction.
10846 if (((MI.getOpcode() == AArch64::ORRWrs &&
10847 MI.getOperand(1).getReg() == AArch64::WZR &&
10848 MI.getOperand(3).getImm() == 0x0) ||
10849 (MI.getOpcode() == AArch64::ORRWrr &&
10850 MI.getOperand(1).getReg() == AArch64::WZR)) &&
10851 // Check that the w->w move is not a zero-extending w->x mov.
10852 (!MI.getOperand(0).getReg().isVirtual() ||
10853 MI.getOperand(0).getSubReg() == 0) &&
10854 (!MI.getOperand(0).getReg().isPhysical() ||
10855 MI.findRegisterDefOperandIdx(getXRegFromWReg(MI.getOperand(0).getReg()),
10856 /*TRI=*/nullptr) == -1))
10857 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10858
10859 if (MI.getOpcode() == AArch64::ORRXrs &&
10860 MI.getOperand(1).getReg() == AArch64::XZR &&
10861 MI.getOperand(3).getImm() == 0x0)
10862 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10863
10864 return std::nullopt;
10865}
10866
10867std::optional<DestSourcePair>
10869 if ((MI.getOpcode() == AArch64::ORRWrs &&
10870 MI.getOperand(1).getReg() == AArch64::WZR &&
10871 MI.getOperand(3).getImm() == 0x0) ||
10872 (MI.getOpcode() == AArch64::ORRWrr &&
10873 MI.getOperand(1).getReg() == AArch64::WZR))
10874 return DestSourcePair{MI.getOperand(0), MI.getOperand(2)};
10875 return std::nullopt;
10876}
10877
10878std::optional<RegImmPair>
10879AArch64InstrInfo::isAddImmediate(const MachineInstr &MI, Register Reg) const {
10880 int Sign = 1;
10881 int64_t Offset = 0;
10882
10883 // TODO: Handle cases where Reg is a super- or sub-register of the
10884 // destination register.
10885 const MachineOperand &Op0 = MI.getOperand(0);
10886 if (!Op0.isReg() || Reg != Op0.getReg())
10887 return std::nullopt;
10888
10889 switch (MI.getOpcode()) {
10890 default:
10891 return std::nullopt;
10892 case AArch64::SUBWri:
10893 case AArch64::SUBXri:
10894 case AArch64::SUBSWri:
10895 case AArch64::SUBSXri:
10896 Sign *= -1;
10897 [[fallthrough]];
10898 case AArch64::ADDSWri:
10899 case AArch64::ADDSXri:
10900 case AArch64::ADDWri:
10901 case AArch64::ADDXri: {
10902 // TODO: Third operand can be global address (usually some string).
10903 if (!MI.getOperand(0).isReg() || !MI.getOperand(1).isReg() ||
10904 !MI.getOperand(2).isImm())
10905 return std::nullopt;
10906 int Shift = MI.getOperand(3).getImm();
10907 assert((Shift == 0 || Shift == 12) && "Shift can be either 0 or 12");
10908 Offset = Sign * (MI.getOperand(2).getImm() << Shift);
10909 }
10910 }
10911 return RegImmPair{MI.getOperand(1).getReg(), Offset};
10912}
10913
10914/// If the given ORR instruction is a copy, and \p DescribedReg overlaps with
10915/// the destination register then, if possible, describe the value in terms of
10916/// the source register.
10917static std::optional<ParamLoadedValue>
10919 const TargetInstrInfo *TII,
10920 const TargetRegisterInfo *TRI) {
10921 auto DestSrc = TII->isCopyLikeInstr(MI);
10922 if (!DestSrc)
10923 return std::nullopt;
10924
10925 Register DestReg = DestSrc->Destination->getReg();
10926 Register SrcReg = DestSrc->Source->getReg();
10927
10928 if (!DestReg.isValid() || !SrcReg.isValid())
10929 return std::nullopt;
10930
10931 auto Expr = DIExpression::get(MI.getMF()->getFunction().getContext(), {});
10932
10933 // If the described register is the destination, just return the source.
10934 if (DestReg == DescribedReg)
10935 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10936
10937 // ORRWrs zero-extends to 64-bits, so we need to consider such cases.
10938 if (MI.getOpcode() == AArch64::ORRWrs &&
10939 TRI->isSuperRegister(DestReg, DescribedReg))
10940 return ParamLoadedValue(MachineOperand::CreateReg(SrcReg, false), Expr);
10941
10942 // We may need to describe the lower part of a ORRXrs move.
10943 if (MI.getOpcode() == AArch64::ORRXrs &&
10944 TRI->isSubRegister(DestReg, DescribedReg)) {
10945 Register SrcSubReg = TRI->getSubReg(SrcReg, AArch64::sub_32);
10946 return ParamLoadedValue(MachineOperand::CreateReg(SrcSubReg, false), Expr);
10947 }
10948
10949 assert(!TRI->isSuperOrSubRegisterEq(DestReg, DescribedReg) &&
10950 "Unhandled ORR[XW]rs copy case");
10951
10952 return std::nullopt;
10953}
10954
10955bool AArch64InstrInfo::isFunctionSafeToSplit(const MachineFunction &MF) const {
10956 // Functions cannot be split to different sections on AArch64 if they have
10957 // a red zone. This is because relaxing a cross-section branch may require
10958 // incrementing the stack pointer to spill a register, which would overwrite
10959 // the red zone.
10960 if (MF.getInfo<AArch64FunctionInfo>()->hasRedZone().value_or(true))
10961 return false;
10962
10964}
10965
10966bool AArch64InstrInfo::isMBBSafeToSplitToCold(
10967 const MachineBasicBlock &MBB) const {
10968 // Asm Goto blocks can contain conditional branches to goto labels, which can
10969 // get moved out of range of the branch instruction.
10970 auto isAsmGoto = [](const MachineInstr &MI) {
10971 return MI.getOpcode() == AArch64::INLINEASM_BR;
10972 };
10973 if (llvm::any_of(MBB, isAsmGoto) || MBB.isInlineAsmBrIndirectTarget())
10974 return false;
10975
10976 // Because jump tables are label-relative instead of table-relative, they all
10977 // must be in the same section or relocation fixup handling will fail.
10978
10979 // Check if MBB is a jump table target
10980 const MachineJumpTableInfo *MJTI = MBB.getParent()->getJumpTableInfo();
10981 auto containsMBB = [&MBB](const MachineJumpTableEntry &JTE) {
10982 return llvm::is_contained(JTE.MBBs, &MBB);
10983 };
10984 if (MJTI != nullptr && llvm::any_of(MJTI->getJumpTables(), containsMBB))
10985 return false;
10986
10987 // Check if MBB contains a jump table lookup
10988 for (const MachineInstr &MI : MBB) {
10989 switch (MI.getOpcode()) {
10990 case TargetOpcode::G_BRJT:
10991 case AArch64::JumpTableDest32:
10992 case AArch64::JumpTableDest16:
10993 case AArch64::JumpTableDest8:
10994 return false;
10995 default:
10996 continue;
10997 }
10998 }
10999
11000 // MBB isn't a special case, so it's safe to be split to the cold section.
11001 return true;
11002}
11003
11004std::optional<ParamLoadedValue>
11005AArch64InstrInfo::describeLoadedValue(const MachineInstr &MI,
11006 Register Reg) const {
11007 const MachineFunction *MF = MI.getMF();
11008 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
11009 switch (MI.getOpcode()) {
11010 case AArch64::MOVZWi:
11011 case AArch64::MOVZXi: {
11012 // MOVZWi may be used for producing zero-extended 32-bit immediates in
11013 // 64-bit parameters, so we need to consider super-registers.
11014 if (!TRI->isSuperRegisterEq(MI.getOperand(0).getReg(), Reg))
11015 return std::nullopt;
11016
11017 if (!MI.getOperand(1).isImm())
11018 return std::nullopt;
11019 int64_t Immediate = MI.getOperand(1).getImm();
11020 int Shift = MI.getOperand(2).getImm();
11021 return ParamLoadedValue(MachineOperand::CreateImm(Immediate << Shift),
11022 nullptr);
11023 }
11024 case AArch64::ORRWrs:
11025 case AArch64::ORRXrs:
11026 return describeORRLoadedValue(MI, Reg, this, TRI);
11027 }
11028
11030}
11031
11032bool AArch64InstrInfo::isExtendLikelyToBeFolded(
11033 MachineInstr &ExtMI, MachineRegisterInfo &MRI) const {
11034 assert(ExtMI.getOpcode() == TargetOpcode::G_SEXT ||
11035 ExtMI.getOpcode() == TargetOpcode::G_ZEXT ||
11036 ExtMI.getOpcode() == TargetOpcode::G_ANYEXT);
11037
11038 // Anyexts are nops.
11039 if (ExtMI.getOpcode() == TargetOpcode::G_ANYEXT)
11040 return true;
11041
11042 Register DefReg = ExtMI.getOperand(0).getReg();
11043 if (!MRI.hasOneNonDBGUse(DefReg))
11044 return false;
11045
11046 // It's likely that a sext/zext as a G_PTR_ADD offset will be folded into an
11047 // addressing mode.
11048 auto *UserMI = &*MRI.use_instr_nodbg_begin(DefReg);
11049 return UserMI->getOpcode() == TargetOpcode::G_PTR_ADD;
11050}
11051
11052uint64_t AArch64InstrInfo::getElementSizeForOpcode(unsigned Opc) const {
11053 return get(Opc).TSFlags & AArch64::ElementSizeMask;
11054}
11055
11056bool AArch64InstrInfo::isPTestLikeOpcode(unsigned Opc) const {
11057 return get(Opc).TSFlags & AArch64::InstrFlagIsPTestLike;
11058}
11059
11060bool AArch64InstrInfo::isWhileOpcode(unsigned Opc) const {
11061 return get(Opc).TSFlags & AArch64::InstrFlagIsWhile;
11062}
11063
11064unsigned int
11065AArch64InstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
11066 return OptLevel >= CodeGenOptLevel::Aggressive ? 6 : 2;
11067}
11068
11069bool AArch64InstrInfo::isLegalAddressingMode(unsigned NumBytes, int64_t Offset,
11070 unsigned Scale) const {
11071 if (Offset && Scale)
11072 return false;
11073
11074 // Check Reg + Imm
11075 if (!Scale) {
11076 // 9-bit signed offset
11077 if (isInt<9>(Offset))
11078 return true;
11079
11080 // 12-bit unsigned offset
11081 unsigned Shift = Log2_64(NumBytes);
11082 if (NumBytes && Offset > 0 && (Offset / NumBytes) <= (1LL << 12) - 1 &&
11083 // Must be a multiple of NumBytes (NumBytes is a power of 2)
11084 (Offset >> Shift) << Shift == Offset)
11085 return true;
11086 return false;
11087 }
11088
11089 // Check reg1 + SIZE_IN_BYTES * reg2 and reg1 + reg2
11090 return Scale == 1 || (Scale > 0 && Scale == NumBytes);
11091}
11092
11094 if (MF.getSubtarget<AArch64Subtarget>().hardenSlsBlr())
11095 return AArch64::BLRNoIP;
11096 else
11097 return AArch64::BLR;
11098}
11099
11102 Register TargetReg, bool FrameSetup) const {
11103 assert(TargetReg != AArch64::SP && "New top of stack cannot already be in SP");
11104
11105 MachineBasicBlock &MBB = *MBBI->getParent();
11106 MachineFunction &MF = *MBB.getParent();
11107 const AArch64InstrInfo *TII =
11108 MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
11109 int64_t ProbeSize = MF.getInfo<AArch64FunctionInfo>()->getStackProbeSize();
11110 DebugLoc DL = MBB.findDebugLoc(MBBI);
11111
11112 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator());
11113 MachineBasicBlock *LoopTestMBB =
11114 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11115 MF.insert(MBBInsertPoint, LoopTestMBB);
11116 MachineBasicBlock *LoopBodyMBB =
11117 MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11118 MF.insert(MBBInsertPoint, LoopBodyMBB);
11119 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(MBB.getBasicBlock());
11120 MF.insert(MBBInsertPoint, ExitMBB);
11121 MachineInstr::MIFlag Flags =
11123
11124 // LoopTest:
11125 // SUB SP, SP, #ProbeSize
11126 emitFrameOffset(*LoopTestMBB, LoopTestMBB->end(), DL, AArch64::SP,
11127 AArch64::SP, StackOffset::getFixed(-ProbeSize), TII, Flags);
11128
11129 // CMP SP, TargetReg
11130 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::SUBSXrx64),
11131 AArch64::XZR)
11132 .addReg(AArch64::SP)
11133 .addReg(TargetReg)
11135 .setMIFlags(Flags);
11136
11137 // B.<Cond> LoopExit
11138 BuildMI(*LoopTestMBB, LoopTestMBB->end(), DL, TII->get(AArch64::Bcc))
11140 .addMBB(ExitMBB)
11141 .setMIFlags(Flags);
11142
11143 // STR XZR, [SP]
11144 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::STRXui))
11145 .addReg(AArch64::XZR)
11146 .addReg(AArch64::SP)
11147 .addImm(0)
11148 .setMIFlags(Flags);
11149
11150 // B loop
11151 BuildMI(*LoopBodyMBB, LoopBodyMBB->end(), DL, TII->get(AArch64::B))
11152 .addMBB(LoopTestMBB)
11153 .setMIFlags(Flags);
11154
11155 // LoopExit:
11156 // MOV SP, TargetReg
11157 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::ADDXri), AArch64::SP)
11158 .addReg(TargetReg)
11159 .addImm(0)
11161 .setMIFlags(Flags);
11162
11163 // LDR XZR, [SP]
11164 BuildMI(*ExitMBB, ExitMBB->end(), DL, TII->get(AArch64::LDRXui))
11165 .addReg(AArch64::XZR, RegState::Define)
11166 .addReg(AArch64::SP)
11167 .addImm(0)
11168 .setMIFlags(Flags);
11169
11170 ExitMBB->splice(ExitMBB->end(), &MBB, std::next(MBBI), MBB.end());
11172
11173 LoopTestMBB->addSuccessor(ExitMBB);
11174 LoopTestMBB->addSuccessor(LoopBodyMBB);
11175 LoopBodyMBB->addSuccessor(LoopTestMBB);
11176 MBB.addSuccessor(LoopTestMBB);
11177
11178 // Update liveins.
11179 if (MF.getRegInfo().reservedRegsFrozen())
11180 fullyRecomputeLiveIns({ExitMBB, LoopBodyMBB, LoopTestMBB});
11181
11182 return ExitMBB->begin();
11183}
11184
11185namespace {
11186class AArch64PipelinerLoopInfo : public TargetInstrInfo::PipelinerLoopInfo {
11187 MachineFunction *MF;
11188 const TargetInstrInfo *TII;
11189 const TargetRegisterInfo *TRI;
11191
11192 /// The block of the loop
11193 MachineBasicBlock *LoopBB;
11194 /// The conditional branch of the loop
11195 MachineInstr *CondBranch;
11196 /// The compare instruction for loop control
11197 MachineInstr *Comp;
11198 /// The number of the operand of the loop counter value in Comp
11199 unsigned CompCounterOprNum;
11200 /// The instruction that updates the loop counter value
11201 MachineInstr *Update;
11202 /// The number of the operand of the loop counter value in Update
11203 unsigned UpdateCounterOprNum;
11204 /// The initial value of the loop counter
11205 Register Init;
11206 /// True iff Update is a predecessor of Comp
11207 bool IsUpdatePriorComp;
11208
11209 /// The normalized condition used by createTripCountGreaterCondition()
11211
11212public:
11213 AArch64PipelinerLoopInfo(MachineBasicBlock *LoopBB, MachineInstr *CondBranch,
11214 MachineInstr *Comp, unsigned CompCounterOprNum,
11215 MachineInstr *Update, unsigned UpdateCounterOprNum,
11216 Register Init, bool IsUpdatePriorComp,
11218 : MF(Comp->getParent()->getParent()),
11219 TII(MF->getSubtarget().getInstrInfo()),
11220 TRI(MF->getSubtarget().getRegisterInfo()), MRI(MF->getRegInfo()),
11221 LoopBB(LoopBB), CondBranch(CondBranch), Comp(Comp),
11222 CompCounterOprNum(CompCounterOprNum), Update(Update),
11223 UpdateCounterOprNum(UpdateCounterOprNum), Init(Init),
11224 IsUpdatePriorComp(IsUpdatePriorComp), Cond(Cond.begin(), Cond.end()) {}
11225
11226 bool shouldIgnoreForPipelining(const MachineInstr *MI) const override {
11227 // Make the instructions for loop control be placed in stage 0.
11228 // The predecessors of Comp are considered by the caller.
11229 return MI == Comp;
11230 }
11231
11232 std::optional<bool> createTripCountGreaterCondition(
11233 int TC, MachineBasicBlock &MBB,
11234 SmallVectorImpl<MachineOperand> &CondParam) override {
11235 // A branch instruction will be inserted as "if (Cond) goto epilogue".
11236 // Cond is normalized for such use.
11237 // The predecessors of the branch are assumed to have already been inserted.
11238 CondParam = Cond;
11239 return {};
11240 }
11241
11242 void createRemainingIterationsGreaterCondition(
11243 int TC, MachineBasicBlock &MBB, SmallVectorImpl<MachineOperand> &Cond,
11244 DenseMap<MachineInstr *, MachineInstr *> &LastStage0Insts) override;
11245
11246 void setPreheader(MachineBasicBlock *NewPreheader) override {}
11247
11248 void adjustTripCount(int TripCountAdjust) override {}
11249
11250 bool isMVEExpanderSupported() override { return true; }
11251};
11252} // namespace
11253
11254/// Clone an instruction from MI. The register of ReplaceOprNum-th operand
11255/// is replaced by ReplaceReg. The output register is newly created.
11256/// The other operands are unchanged from MI.
11257static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum,
11258 Register ReplaceReg, MachineBasicBlock &MBB,
11259 MachineBasicBlock::iterator InsertTo) {
11260 MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
11261 const TargetInstrInfo *TII = MBB.getParent()->getSubtarget().getInstrInfo();
11262 MachineInstr *NewMI = MBB.getParent()->CloneMachineInstr(MI);
11263 Register Result = 0;
11264 for (unsigned I = 0; I < NewMI->getNumOperands(); ++I) {
11265 if (I == 0 && NewMI->getOperand(0).getReg().isVirtual()) {
11266 Result = MRI.createVirtualRegister(
11267 MRI.getRegClass(NewMI->getOperand(0).getReg()));
11268 NewMI->getOperand(I).setReg(Result);
11269 } else if (I == ReplaceOprNum) {
11270 MRI.constrainRegClass(ReplaceReg, TII->getRegClass(NewMI->getDesc(), I));
11271 NewMI->getOperand(I).setReg(ReplaceReg);
11272 }
11273 }
11274 MBB.insert(InsertTo, NewMI);
11275 return Result;
11276}
11277
11278void AArch64PipelinerLoopInfo::createRemainingIterationsGreaterCondition(
11281 // Create and accumulate conditions for next TC iterations.
11282 // Example:
11283 // SUBSXrr N, counter, implicit-def $nzcv # compare instruction for the last
11284 // # iteration of the kernel
11285 //
11286 // # insert the following instructions
11287 // cond = CSINCXr 0, 0, C, implicit $nzcv
11288 // counter = ADDXri counter, 1 # clone from this->Update
11289 // SUBSXrr n, counter, implicit-def $nzcv # clone from this->Comp
11290 // cond = CSINCXr cond, cond, C, implicit $nzcv
11291 // ... (repeat TC times)
11292 // SUBSXri cond, 0, implicit-def $nzcv
11293
11294 assert(CondBranch->getOpcode() == AArch64::Bcc);
11295 // CondCode to exit the loop
11297 (AArch64CC::CondCode)CondBranch->getOperand(0).getImm();
11298 if (CondBranch->getOperand(1).getMBB() == LoopBB)
11300
11301 // Accumulate conditions to exit the loop
11302 Register AccCond = AArch64::XZR;
11303
11304 // If CC holds, CurCond+1 is returned; otherwise CurCond is returned.
11305 auto AccumulateCond = [&](Register CurCond,
11307 Register NewCond = MRI.createVirtualRegister(&AArch64::GPR64commonRegClass);
11308 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::CSINCXr))
11309 .addReg(NewCond, RegState::Define)
11310 .addReg(CurCond)
11311 .addReg(CurCond)
11313 return NewCond;
11314 };
11315
11316 if (!LastStage0Insts.empty() && LastStage0Insts[Comp]->getParent() == &MBB) {
11317 // Update and Comp for I==0 are already exists in MBB
11318 // (MBB is an unrolled kernel)
11319 Register Counter;
11320 for (int I = 0; I <= TC; ++I) {
11321 Register NextCounter;
11322 if (I != 0)
11323 NextCounter =
11324 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11325
11326 AccCond = AccumulateCond(AccCond, CC);
11327
11328 if (I != TC) {
11329 if (I == 0) {
11330 if (Update != Comp && IsUpdatePriorComp) {
11331 Counter =
11332 LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11333 NextCounter = cloneInstr(Update, UpdateCounterOprNum, Counter, MBB,
11334 MBB.end());
11335 } else {
11336 // can use already calculated value
11337 NextCounter = LastStage0Insts[Update]->getOperand(0).getReg();
11338 }
11339 } else if (Update != Comp) {
11340 NextCounter =
11341 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11342 }
11343 }
11344 Counter = NextCounter;
11345 }
11346 } else {
11347 Register Counter;
11348 if (LastStage0Insts.empty()) {
11349 // use initial counter value (testing if the trip count is sufficient to
11350 // be executed by pipelined code)
11351 Counter = Init;
11352 if (IsUpdatePriorComp)
11353 Counter =
11354 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11355 } else {
11356 // MBB is an epilogue block. LastStage0Insts[Comp] is in the kernel block.
11357 Counter = LastStage0Insts[Comp]->getOperand(CompCounterOprNum).getReg();
11358 }
11359
11360 for (int I = 0; I <= TC; ++I) {
11361 Register NextCounter;
11362 NextCounter =
11363 cloneInstr(Comp, CompCounterOprNum, Counter, MBB, MBB.end());
11364 AccCond = AccumulateCond(AccCond, CC);
11365 if (I != TC && Update != Comp)
11366 NextCounter =
11367 cloneInstr(Update, UpdateCounterOprNum, Counter, MBB, MBB.end());
11368 Counter = NextCounter;
11369 }
11370 }
11371
11372 // If AccCond == 0, the remainder is greater than TC.
11373 BuildMI(MBB, MBB.end(), Comp->getDebugLoc(), TII->get(AArch64::SUBSXri))
11374 .addReg(AArch64::XZR, RegState::Define | RegState::Dead)
11375 .addReg(AccCond)
11376 .addImm(0)
11377 .addImm(0);
11378 Cond.clear();
11380}
11381
11382static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB,
11383 Register &RegMBB, Register &RegOther) {
11384 assert(Phi.getNumOperands() == 5);
11385 if (Phi.getOperand(2).getMBB() == MBB) {
11386 RegMBB = Phi.getOperand(1).getReg();
11387 RegOther = Phi.getOperand(3).getReg();
11388 } else {
11389 assert(Phi.getOperand(4).getMBB() == MBB);
11390 RegMBB = Phi.getOperand(3).getReg();
11391 RegOther = Phi.getOperand(1).getReg();
11392 }
11393}
11394
11396 if (!Reg.isVirtual())
11397 return false;
11398 const MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
11399 return MRI.getVRegDef(Reg)->getParent() != BB;
11400}
11401
11402/// If Reg is an induction variable, return true and set some parameters
11403static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB,
11404 MachineInstr *&UpdateInst,
11405 unsigned &UpdateCounterOprNum, Register &InitReg,
11406 bool &IsUpdatePriorComp) {
11407 // Example:
11408 //
11409 // Preheader:
11410 // InitReg = ...
11411 // LoopBB:
11412 // Reg0 = PHI (InitReg, Preheader), (Reg1, LoopBB)
11413 // Reg = COPY Reg0 ; COPY is ignored.
11414 // Reg1 = ADD Reg, #1; UpdateInst. Incremented by a loop invariant value.
11415 // ; Reg is the value calculated in the previous
11416 // ; iteration, so IsUpdatePriorComp == false.
11417
11418 if (LoopBB->pred_size() != 2)
11419 return false;
11420 if (!Reg.isVirtual())
11421 return false;
11422 const MachineRegisterInfo &MRI = LoopBB->getParent()->getRegInfo();
11423 UpdateInst = nullptr;
11424 UpdateCounterOprNum = 0;
11425 InitReg = 0;
11426 IsUpdatePriorComp = true;
11427 Register CurReg = Reg;
11428 while (true) {
11429 MachineInstr *Def = MRI.getVRegDef(CurReg);
11430 if (Def->getParent() != LoopBB)
11431 return false;
11432 if (Def->isCopy()) {
11433 // Ignore copy instructions unless they contain subregisters
11434 if (Def->getOperand(0).getSubReg() || Def->getOperand(1).getSubReg())
11435 return false;
11436 CurReg = Def->getOperand(1).getReg();
11437 } else if (Def->isPHI()) {
11438 if (InitReg != 0)
11439 return false;
11440 if (!UpdateInst)
11441 IsUpdatePriorComp = false;
11442 extractPhiReg(*Def, LoopBB, CurReg, InitReg);
11443 } else {
11444 if (UpdateInst)
11445 return false;
11446 switch (Def->getOpcode()) {
11447 case AArch64::ADDSXri:
11448 case AArch64::ADDSWri:
11449 case AArch64::SUBSXri:
11450 case AArch64::SUBSWri:
11451 case AArch64::ADDXri:
11452 case AArch64::ADDWri:
11453 case AArch64::SUBXri:
11454 case AArch64::SUBWri:
11455 UpdateInst = Def;
11456 UpdateCounterOprNum = 1;
11457 break;
11458 case AArch64::ADDSXrr:
11459 case AArch64::ADDSWrr:
11460 case AArch64::SUBSXrr:
11461 case AArch64::SUBSWrr:
11462 case AArch64::ADDXrr:
11463 case AArch64::ADDWrr:
11464 case AArch64::SUBXrr:
11465 case AArch64::SUBWrr:
11466 UpdateInst = Def;
11467 if (isDefinedOutside(Def->getOperand(2).getReg(), LoopBB))
11468 UpdateCounterOprNum = 1;
11469 else if (isDefinedOutside(Def->getOperand(1).getReg(), LoopBB))
11470 UpdateCounterOprNum = 2;
11471 else
11472 return false;
11473 break;
11474 default:
11475 return false;
11476 }
11477 CurReg = Def->getOperand(UpdateCounterOprNum).getReg();
11478 }
11479
11480 if (!CurReg.isVirtual())
11481 return false;
11482 if (Reg == CurReg)
11483 break;
11484 }
11485
11486 if (!UpdateInst)
11487 return false;
11488
11489 return true;
11490}
11491
11492std::unique_ptr<TargetInstrInfo::PipelinerLoopInfo>
11494 // Accept loops that meet the following conditions
11495 // * The conditional branch is BCC
11496 // * The compare instruction is ADDS/SUBS/WHILEXX
11497 // * One operand of the compare is an induction variable and the other is a
11498 // loop invariant value
11499 // * The induction variable is incremented/decremented by a single instruction
11500 // * Does not contain CALL or instructions which have unmodeled side effects
11501
11502 for (MachineInstr &MI : *LoopBB)
11503 if (MI.isCall() || MI.hasUnmodeledSideEffects())
11504 // This instruction may use NZCV, which interferes with the instruction to
11505 // be inserted for loop control.
11506 return nullptr;
11507
11508 MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
11510 if (analyzeBranch(*LoopBB, TBB, FBB, Cond))
11511 return nullptr;
11512
11513 // Infinite loops are not supported
11514 if (TBB == LoopBB && FBB == LoopBB)
11515 return nullptr;
11516
11517 // Must be conditional branch
11518 if (TBB != LoopBB && FBB == nullptr)
11519 return nullptr;
11520
11521 assert((TBB == LoopBB || FBB == LoopBB) &&
11522 "The Loop must be a single-basic-block loop");
11523
11524 MachineInstr *CondBranch = &*LoopBB->getFirstTerminator();
11526
11527 if (CondBranch->getOpcode() != AArch64::Bcc)
11528 return nullptr;
11529
11530 // Normalization for createTripCountGreaterCondition()
11531 if (TBB == LoopBB)
11533
11534 MachineInstr *Comp = nullptr;
11535 unsigned CompCounterOprNum = 0;
11536 for (MachineInstr &MI : reverse(*LoopBB)) {
11537 if (MI.modifiesRegister(AArch64::NZCV, &TRI)) {
11538 // Guarantee that the compare is SUBS/ADDS/WHILEXX and that one of the
11539 // operands is a loop invariant value
11540
11541 switch (MI.getOpcode()) {
11542 case AArch64::SUBSXri:
11543 case AArch64::SUBSWri:
11544 case AArch64::ADDSXri:
11545 case AArch64::ADDSWri:
11546 Comp = &MI;
11547 CompCounterOprNum = 1;
11548 break;
11549 case AArch64::ADDSWrr:
11550 case AArch64::ADDSXrr:
11551 case AArch64::SUBSWrr:
11552 case AArch64::SUBSXrr:
11553 Comp = &MI;
11554 break;
11555 default:
11556 if (isWhileOpcode(MI.getOpcode())) {
11557 Comp = &MI;
11558 break;
11559 }
11560 return nullptr;
11561 }
11562
11563 if (CompCounterOprNum == 0) {
11564 if (isDefinedOutside(Comp->getOperand(1).getReg(), LoopBB))
11565 CompCounterOprNum = 2;
11566 else if (isDefinedOutside(Comp->getOperand(2).getReg(), LoopBB))
11567 CompCounterOprNum = 1;
11568 else
11569 return nullptr;
11570 }
11571 break;
11572 }
11573 }
11574 if (!Comp)
11575 return nullptr;
11576
11577 MachineInstr *Update = nullptr;
11578 Register Init;
11579 bool IsUpdatePriorComp;
11580 unsigned UpdateCounterOprNum;
11581 if (!getIndVarInfo(Comp->getOperand(CompCounterOprNum).getReg(), LoopBB,
11582 Update, UpdateCounterOprNum, Init, IsUpdatePriorComp))
11583 return nullptr;
11584
11585 return std::make_unique<AArch64PipelinerLoopInfo>(
11586 LoopBB, CondBranch, Comp, CompCounterOprNum, Update, UpdateCounterOprNum,
11587 Init, IsUpdatePriorComp, Cond);
11588}
11589
11590/// verifyInstruction - Perform target specific instruction verification.
11591bool AArch64InstrInfo::verifyInstruction(const MachineInstr &MI,
11592 StringRef &ErrInfo) const {
11593 // Verify that immediate offsets on load/store instructions are within range.
11594 // Stack objects with an FI operand are excluded as they can be fixed up
11595 // during PEI.
11596 TypeSize Scale(0U, false), Width(0U, false);
11597 int64_t MinOffset, MaxOffset;
11598 if (getMemOpInfo(MI.getOpcode(), Scale, Width, MinOffset, MaxOffset)) {
11599 unsigned ImmIdx = getLoadStoreImmIdx(MI.getOpcode());
11600 if (MI.getOperand(ImmIdx).isImm() && !MI.getOperand(ImmIdx - 1).isFI()) {
11601 int64_t Imm = MI.getOperand(ImmIdx).getImm();
11602 if (Imm < MinOffset || Imm > MaxOffset) {
11603 ErrInfo = "Unexpected immediate on load/store instruction";
11604 return false;
11605 }
11606 }
11607 }
11608
11609 const MCInstrDesc &MCID = MI.getDesc();
11610 for (unsigned Op = 0; Op < MCID.getNumOperands(); Op++) {
11611 const MachineOperand &MO = MI.getOperand(Op);
11612 switch (MCID.operands()[Op].OperandType) {
11614 if (!MO.isImm() || MO.getImm() != 0) {
11615 ErrInfo = "OPERAND_IMPLICIT_IMM_0 should be 0";
11616 return false;
11617 }
11618 break;
11620 if (!MO.isImm() ||
11622 (AArch64_AM::getShiftValue(MO.getImm()) != 8 &&
11623 AArch64_AM::getShiftValue(MO.getImm()) != 16)) {
11624 ErrInfo = "OPERAND_SHIFT_MSL should be msl shift of 8 or 16";
11625 return false;
11626 }
11627 break;
11628 default:
11629 break;
11630 }
11631 }
11632 return true;
11633}
11634
11635#define GET_INSTRINFO_HELPERS
11636#define GET_INSTRMAP_INFO
11637#include "AArch64GenInstrInfo.inc"
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineInstrBuilder MachineInstrBuilder & DefMI
static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg, unsigned NumRegs)
static cl::opt< unsigned > BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of Bcc instructions (DEBUG)"))
static Register genNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned MnegOpc, const TargetRegisterClass *RC)
genNeg - Helper to generate an intermediate negation of the second operand of Root
static bool isFrameStoreOpcode(int Opcode)
static cl::opt< unsigned > GatherOptSearchLimit("aarch64-search-limit", cl::Hidden, cl::init(2048), cl::desc("Restrict range of instructions to search for the " "machine-combiner gather pattern optimization"))
static bool getMaddPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find instructions that can be turned into madd.
static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr)
Find a condition code used by the instruction.
static MachineInstr * genFusedMultiplyAcc(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyAcc - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genFusedMultiplyAccNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate64(unsigned Opc)
static bool isFrameLoadOpcode(int Opcode)
static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg)
static bool areCFlagsAccessedBetweenInstrs(MachineBasicBlock::iterator From, MachineBasicBlock::iterator To, const TargetRegisterInfo *TRI, const AccessKind AccessToCheck=AK_All)
True when condition flags are accessed (either by writing or reading) on the instruction trace starti...
static bool getFMAPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Floating-Point Support.
static bool isADDSRegImm(unsigned Opcode)
static bool isCheapCopy(const MachineInstr &MI, const AArch64RegisterInfo &RI)
static bool isANDOpcode(MachineInstr &MI)
static void appendOffsetComment(int NumBytes, llvm::raw_string_ostream &Comment, StringRef RegScale={})
static unsigned sForm(MachineInstr &Instr)
Get opcode of S version of Instr.
static bool isCombineInstrSettingFlag(unsigned Opc)
static bool getFNEGPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static bool getIndVarInfo(Register Reg, const MachineBasicBlock *LoopBB, MachineInstr *&UpdateInst, unsigned &UpdateCounterOprNum, Register &InitReg, bool &IsUpdatePriorComp)
If Reg is an induction variable, return true and set some parameters.
static const MachineInstrBuilder & AddSubReg(const MachineInstrBuilder &MIB, MCRegister Reg, unsigned SubIdx, unsigned State, const TargetRegisterInfo *TRI)
static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc)
static int findCondCodeUseOperandIdxForBranchOrSelect(const MachineInstr &Instr)
static bool isPostIndexLdStOpcode(unsigned Opcode)
Return true if the opcode is a post-index ld/st instruction, which really loads from base+0.
static unsigned getBranchDisplacementBits(unsigned Opc)
static cl::opt< unsigned > CBDisplacementBits("aarch64-cb-offset-bits", cl::Hidden, cl::init(9), cl::desc("Restrict range of CB instructions (DEBUG)"))
static std::optional< ParamLoadedValue > describeORRLoadedValue(const MachineInstr &MI, Register DescribedReg, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI)
If the given ORR instruction is a copy, and DescribedReg overlaps with the destination register then,...
static bool getFMULPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
static void appendReadRegExpr(SmallVectorImpl< char > &Expr, unsigned RegNum)
static MachineInstr * genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR, const TargetRegisterClass *RC)
genMaddR - Generate madd instruction and combine mul and add using an extra virtual register Example ...
static Register cloneInstr(const MachineInstr *MI, unsigned ReplaceOprNum, Register ReplaceReg, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertTo)
Clone an instruction from MI.
static bool scaleOffset(unsigned Opc, int64_t &Offset)
static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc)
unsigned scaledOffsetOpcode(unsigned Opcode, unsigned &Scale)
static MachineInstr * genFusedMultiplyIdx(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC)
genFusedMultiplyIdx - Helper to generate fused multiply accumulate instructions.
static MachineInstr * genIndexedMultiply(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxDupOp, unsigned MulOpc, const TargetRegisterClass *RC, MachineRegisterInfo &MRI)
Fold (FMUL x (DUP y lane)) into (FMUL_indexed x y lane)
static bool isSUBSRegImm(unsigned Opcode)
static bool UpdateOperandRegClass(MachineInstr &Instr)
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool canCmpInstrBeRemoved(MachineInstr &MI, MachineInstr &CmpInstr, int CmpValue, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > &CCUseInstrs, bool &IsInvertCC)
unsigned unscaledOffsetOpcode(unsigned Opcode)
static bool getLoadPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Search for patterns of LD instructions we can optimize.
static bool canInstrSubstituteCmpInstr(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI)
Check if CmpInstr can be substituted by MI.
static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC)
static bool isCombineInstrCandidateFP(const MachineInstr &Inst)
static void appendLoadRegExpr(SmallVectorImpl< char > &Expr, int64_t OffsetFromDefCFA)
static void appendConstantExpr(SmallVectorImpl< char > &Expr, int64_t Constant, dwarf::LocationAtom Operation)
static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI)
Return the opcode that does not set flags when possible - otherwise return the original opcode.
static bool outliningCandidatesV8_3OpsConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool isCombineInstrCandidate32(unsigned Opc)
static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target, SmallVectorImpl< MachineOperand > &Cond)
static unsigned offsetExtendOpcode(unsigned Opcode)
MachineOutlinerMBBFlags
@ LRUnavailableSomewhere
@ UnsafeRegsDead
static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register DestReg, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
static void generateGatherLanePattern(MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned Pattern, unsigned NumLanes)
Generate optimized instruction sequence for gather load patterns to improve Memory-Level Parallelism ...
static bool getMiscPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns)
Find other MI combine patterns.
static bool outliningCandidatesSigningKeyConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool outliningCandidatesSigningScopeConsensus(const outliner::Candidate &a, const outliner::Candidate &b)
static bool shouldClusterFI(const MachineFrameInfo &MFI, int FI1, int64_t Offset1, unsigned Opcode1, int FI2, int64_t Offset2, unsigned Opcode2)
static cl::opt< unsigned > TBZDisplacementBits("aarch64-tbz-offset-bits", cl::Hidden, cl::init(14), cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"))
static void extractPhiReg(const MachineInstr &Phi, const MachineBasicBlock *MBB, Register &RegMBB, Register &RegOther)
static MCCFIInstruction createDefCFAExpression(const TargetRegisterInfo &TRI, unsigned Reg, const StackOffset &Offset)
static bool isDefinedOutside(Register Reg, const MachineBasicBlock *BB)
static MachineInstr * genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, unsigned IdxMulOpd, unsigned MaddOpc, const TargetRegisterClass *RC, FMAInstKind kind=FMAInstKind::Default, const Register *ReplacedAddend=nullptr)
genFusedMultiply - Generate fused multiply instructions.
static bool getGatherLanePattern(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, unsigned LoadLaneOpCode, unsigned NumLanes)
Check if the given instruction forms a gather load pattern that can be optimized for better Memory-Le...
static MachineInstr * genFusedMultiplyIdxNeg(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, DenseMap< Register, unsigned > &InstrIdxForVirtReg, unsigned IdxMulOpd, unsigned MaddOpc, unsigned MnegOpc, const TargetRegisterClass *RC)
genFusedMultiplyAccNeg - Helper to generate fused multiply accumulate instructions with an additional...
static bool isCombineInstrCandidate(unsigned Opc)
static unsigned regOffsetOpcode(unsigned Opcode)
MachineOutlinerClass
Constants defining how certain sequences should be outlined.
@ MachineOutlinerTailCall
Emit a save, restore, call, and return.
@ MachineOutlinerRegSave
Emit a call and tail-call.
@ MachineOutlinerNoLRSave
Only emit a branch.
@ MachineOutlinerThunk
Emit a call and return.
@ MachineOutlinerDefault
static cl::opt< unsigned > BDisplacementBits("aarch64-b-offset-bits", cl::Hidden, cl::init(26), cl::desc("Restrict range of B instructions (DEBUG)"))
static bool areCFlagsAliveInSuccessors(const MachineBasicBlock *MBB)
Check if AArch64::NZCV should be alive in successors of MBB.
static void emitFrameOffsetAdj(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, int64_t Offset, unsigned Opc, const TargetInstrInfo *TII, MachineInstr::MIFlag Flag, bool NeedsWinCFI, bool *HasWinCFI, bool EmitCFAOffset, StackOffset CFAOffset, unsigned FrameReg)
static bool isCheapImmediate(const MachineInstr &MI, unsigned BitSize)
static cl::opt< unsigned > CBZDisplacementBits("aarch64-cbz-offset-bits", cl::Hidden, cl::init(19), cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"))
static void genSubAdd2SubSub(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, unsigned IdxOpd1, DenseMap< Register, unsigned > &InstrIdxForVirtReg)
Do the following transformation A - (B + C) ==> (A - B) - C A - (B + C) ==> (A - C) - B.
static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg, unsigned *NewReg=nullptr)
static void signOutlinedFunction(MachineFunction &MF, MachineBasicBlock &MBB, const AArch64InstrInfo *TII, bool ShouldSignReturnAddr)
static MachineInstr * genFNegatedMAD(MachineFunction &MF, MachineRegisterInfo &MRI, const TargetInstrInfo *TII, MachineInstr &Root, SmallVectorImpl< MachineInstr * > &InsInstrs)
static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO, unsigned MulOpc, unsigned ZeroReg)
static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI, MachineBasicBlock &MBB, MachineBasicBlock::iterator InsertBefore, const MCInstrDesc &MCID, Register SrcReg, bool IsKill, unsigned SubIdx0, unsigned SubIdx1, int FI, MachineMemOperand *MMO)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static const Function * getParent(const Value *V)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
DXIL Forward Handle Accesses
@ Default
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
MachineInstr unsigned OpIdx
ConstantRange Range(APInt(BitWidth, Low), APInt(BitWidth, High))
PowerPC Reduce CR logical Operation
const SmallVectorImpl< MachineOperand > MachineBasicBlock * TBB
const SmallVectorImpl< MachineOperand > & Cond
This file declares the machine register scavenger class.
This file contains some templates that are useful if you are working with the STL at all.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the SmallSet class.
This file defines the SmallVector class.
static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO, unsigned CombineOpc=0)
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
SignReturnAddress getSignReturnAddressCondition() const
void setOutliningStyle(const std::string &Style)
std::optional< bool > hasRedZone() const
static bool shouldSignReturnAddress(SignReturnAddress Condition, bool IsLRSpilled)
static bool isHForm(const MachineInstr &MI)
Returns whether the instruction is in H form (16 bit operands)
void insertSelect(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, Register DstReg, ArrayRef< MachineOperand > Cond, Register TrueReg, Register FalseReg) const override
static bool hasBTISemantics(const MachineInstr &MI)
Returns whether the instruction can be compatible with non-zero BTYPE.
static bool isQForm(const MachineInstr &MI)
Returns whether the instruction is in Q form (128 bit operands)
static bool getMemOpInfo(unsigned Opcode, TypeSize &Scale, TypeSize &Width, int64_t &MinOffset, int64_t &MaxOffset)
Returns true if opcode Opc is a memory operation.
static bool isTailCallReturnInst(const MachineInstr &MI)
Returns true if MI is one of the TCRETURN* instructions.
static bool isFPRCopy(const MachineInstr &MI)
Does this instruction rename an FPR without modifying bits?
MachineInstr * emitLdStWithAddr(MachineInstr &MemI, const ExtAddrMode &AM) const override
std::optional< DestSourcePair > isCopyInstrImpl(const MachineInstr &MI) const override
If the specific machine instruction is an instruction that moves/copies value from one register to an...
MachineBasicBlock * getBranchDestBlock(const MachineInstr &MI) const override
unsigned getInstSizeInBytes(const MachineInstr &MI) const override
GetInstSize - Return the number of bytes of code the specified instruction may be.
bool areMemAccessesTriviallyDisjoint(const MachineInstr &MIa, const MachineInstr &MIb) const override
void copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, Register DestReg, Register SrcReg, bool KillSrc, bool RenamableDest=false, bool RenamableSrc=false) const override
static bool isGPRCopy(const MachineInstr &MI)
Does this instruction rename a GPR without modifying bits?
static unsigned convertToFlagSettingOpc(unsigned Opc)
Return the opcode that set flags when possible.
bool isBranchOffsetInRange(unsigned BranchOpc, int64_t BrOffset) const override
bool canInsertSelect(const MachineBasicBlock &, ArrayRef< MachineOperand > Cond, Register, Register, Register, int &, int &, int &) const override
Register isLoadFromStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
Check for post-frame ptr elimination stack locations as well.
static const MachineOperand & getLdStOffsetOp(const MachineInstr &MI)
Returns the immediate offset operator of a load/store.
bool isCoalescableExtInstr(const MachineInstr &MI, Register &SrcReg, Register &DstReg, unsigned &SubIdx) const override
static std::optional< unsigned > getUnscaledLdSt(unsigned Opc)
Returns the unscaled load/store for the scaled load/store opcode, if there is a corresponding unscale...
static bool hasUnscaledLdStOffset(unsigned Opc)
Return true if it has an unscaled load/store offset.
static const MachineOperand & getLdStAmountOp(const MachineInstr &MI)
Returns the shift amount operator of a load/store.
static bool isPreLdSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load/store.
std::optional< ExtAddrMode > getAddrModeFromMemoryOp(const MachineInstr &MemI, const TargetRegisterInfo *TRI) const override
bool getMemOperandsWithOffsetWidth(const MachineInstr &MI, SmallVectorImpl< const MachineOperand * > &BaseOps, int64_t &Offset, bool &OffsetIsScalable, LocationSize &Width, const TargetRegisterInfo *TRI) const override
bool analyzeBranchPredicate(MachineBasicBlock &MBB, MachineBranchPredicate &MBP, bool AllowModify) const override
void insertIndirectBranch(MachineBasicBlock &MBB, MachineBasicBlock &NewDestBB, MachineBasicBlock &RestoreBB, const DebugLoc &DL, int64_t BrOffset, RegScavenger *RS) const override
void loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register DestReg, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
static bool isPairableLdStInst(const MachineInstr &MI)
Return true if pairing the given load or store may be paired with another.
void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, Register SrcReg, bool isKill, int FrameIndex, const TargetRegisterClass *RC, Register VReg, MachineInstr::MIFlag Flags=MachineInstr::NoFlags) const override
const AArch64RegisterInfo & getRegisterInfo() const
getRegisterInfo - TargetInstrInfo is a superset of MRegister info.
static bool isPreSt(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed store.
MachineInstr * foldMemoryOperandImpl(MachineFunction &MF, MachineInstr &MI, ArrayRef< unsigned > Ops, MachineBasicBlock::iterator InsertPt, int FrameIndex, LiveIntervals *LIS=nullptr, VirtRegMap *VRM=nullptr) const override
void insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const override
AArch64InstrInfo(const AArch64Subtarget &STI)
static bool isPairedLdSt(const MachineInstr &MI)
Returns whether the instruction is a paired load/store.
bool getMemOperandWithOffsetWidth(const MachineInstr &MI, const MachineOperand *&BaseOp, int64_t &Offset, bool &OffsetIsScalable, TypeSize &Width, const TargetRegisterInfo *TRI) const
If OffsetIsScalable is set to 'true', the offset is scaled by vscale.
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
bool reverseBranchCondition(SmallVectorImpl< MachineOperand > &Cond) const override
static bool isStridedAccess(const MachineInstr &MI)
Return true if the given load or store is a strided memory access.
bool shouldClusterMemOps(ArrayRef< const MachineOperand * > BaseOps1, int64_t Offset1, bool OffsetIsScalable1, ArrayRef< const MachineOperand * > BaseOps2, int64_t Offset2, bool OffsetIsScalable2, unsigned ClusterSize, unsigned NumBytes) const override
Detect opportunities for ldp/stp formation.
unsigned removeBranch(MachineBasicBlock &MBB, int *BytesRemoved=nullptr) const override
bool isThroughputPattern(unsigned Pattern) const override
Return true when a code sequence can improve throughput.
MachineOperand & getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const
Return the immediate offset of the base register in a load/store LdSt.
bool analyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl< MachineOperand > &Cond, bool AllowModify=false) const override
bool canFoldIntoAddrMode(const MachineInstr &MemI, Register Reg, const MachineInstr &AddrI, ExtAddrMode &AM) const override
static bool isLdStPairSuppressed(const MachineInstr &MI)
Return true if pairing the given load or store is hinted to be unprofitable.
Register isStoreToStackSlotPostFE(const MachineInstr &MI, int &FrameIndex) const override
Check for post-frame ptr elimination stack locations as well.
std::unique_ptr< TargetInstrInfo::PipelinerLoopInfo > analyzeLoopForPipelining(MachineBasicBlock *LoopBB) const override
bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const override
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
MachineBasicBlock::iterator probedStackAlloc(MachineBasicBlock::iterator MBBI, Register TargetReg, bool FrameSetup) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
bool optimizeCompareInstr(MachineInstr &CmpInstr, Register SrcReg, Register SrcReg2, int64_t CmpMask, int64_t CmpValue, const MachineRegisterInfo *MRI) const override
optimizeCompareInstr - Convert the instruction supplying the argument to the comparison into one that...
static unsigned getLoadStoreImmIdx(unsigned Opc)
Returns the index for the immediate for a given instruction.
static bool isGPRZero(const MachineInstr &MI)
Does this instruction set its full destination register to zero?
void copyGPRRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, unsigned ZeroReg, llvm::ArrayRef< unsigned > Indices) const
bool analyzeCompare(const MachineInstr &MI, Register &SrcReg, Register &SrcReg2, int64_t &CmpMask, int64_t &CmpValue) const override
analyzeCompare - For a comparison instruction, return the source registers in SrcReg and SrcReg2,...
CombinerObjective getCombinerObjective(unsigned Pattern) const override
static bool isFpOrNEON(Register Reg)
Returns whether the physical register is FP or NEON.
bool isAsCheapAsAMove(const MachineInstr &MI) const override
std::optional< DestSourcePair > isCopyLikeInstrImpl(const MachineInstr &MI) const override
static void suppressLdStPair(MachineInstr &MI)
Hint that pairing the given load or store is unprofitable.
Register isStoreToStackSlot(const MachineInstr &MI, int &FrameIndex) const override
static bool isPreLd(const MachineInstr &MI)
Returns whether the instruction is a pre-indexed load.
void copyPhysRegTuple(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL, MCRegister DestReg, MCRegister SrcReg, bool KillSrc, unsigned Opcode, llvm::ArrayRef< unsigned > Indices) const
bool optimizeCondBranch(MachineInstr &MI) const override
Replace csincr-branch sequence by simple conditional branch.
static int getMemScale(unsigned Opc)
Scaling factor for (scaled or unscaled) load or store.
bool isCandidateToMergeOrPair(const MachineInstr &MI) const
Return true if this is a load/store that can be potentially paired/merged.
MCInst getNop() const override
static const MachineOperand & getLdStBaseOp(const MachineInstr &MI)
Returns the base register operator of a load/store.
bool isReservedReg(const MachineFunction &MF, MCRegister Reg) const
const AArch64RegisterInfo * getRegisterInfo() const override
bool isNeonAvailable() const
Returns true if the target has NEON and the function at runtime is known to have NEON enabled (e....
bool isSVEorStreamingSVEAvailable() const
Returns true if the target has access to either the full range of SVE instructions,...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
This is an important base class in LLVM.
Definition Constant.h:43
A debug info location.
Definition DebugLoc.h:124
bool empty() const
Definition DenseMap.h:109
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:233
bool hasOptSize() const
Optimize this function for size (-Os) or minimum size (-Oz).
Definition Function.h:706
bool hasMinSize() const
Optimize this function for minimum size (-Oz).
Definition Function.h:703
Module * getParent()
Get the module that this global value is contained inside of...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static LocationSize precise(uint64_t Value)
This class is intended to be used as a base class for asm properties and features specific to the tar...
Definition MCAsmInfo.h:64
bool usesWindowsCFI() const
Definition MCAsmInfo.h:652
static MCCFIInstruction cfiDefCfa(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa defines a rule for computing CFA as: take address from Register and add Offset to it.
Definition MCDwarf.h:585
static MCCFIInstruction createOffset(MCSymbol *L, unsigned Register, int64_t Offset, SMLoc Loc={})
.cfi_offset Previous value of Register is saved at offset Offset from CFA.
Definition MCDwarf.h:627
static MCCFIInstruction cfiDefCfaOffset(MCSymbol *L, int64_t Offset, SMLoc Loc={})
.cfi_def_cfa_offset modifies a rule for computing CFA.
Definition MCDwarf.h:600
static MCCFIInstruction createEscape(MCSymbol *L, StringRef Vals, SMLoc Loc={}, StringRef Comment="")
.cfi_escape Allows the user to add arbitrary bytes to the unwind info.
Definition MCDwarf.h:697
MCInstBuilder & addImm(int64_t Val)
Add a new integer immediate operand.
Instances of this class represent a single low-level machine instruction.
Definition MCInst.h:188
Describe properties that are true of each instruction in the target description file.
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
ArrayRef< MCOperandInfo > operands() const
Wrapper class representing physical registers. Should be passed by value.
Definition MCRegister.h:41
constexpr bool isValid() const
Definition MCRegister.h:84
static constexpr unsigned NoRegister
Definition MCRegister.h:60
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
Set of metadata that should be preserved when using BuildMI().
bool isInlineAsmBrIndirectTarget() const
Returns true if this is the indirect dest of an INLINEASM_BR.
LLVM_ABI void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
Instructions::iterator instr_iterator
Instructions::const_iterator const_instr_iterator
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
LLVM_ABI instr_iterator erase(instr_iterator I)
Remove an instruction from the instruction list and delete it.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
uint64_t getStackSize() const
Return the number of bytes that must be allocated to hold all of the fixed size frame objects.
void setStackID(int ObjectIdx, uint8_t ID)
bool isCalleeSavedInfoValid() const
Has the callee saved info been calculated yet?
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
unsigned getNumObjects() const
Return the number of objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
unsigned addFrameInst(const MCCFIInstruction &Inst)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
BasicBlockListType::iterator iterator
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineJumpTableInfo * getJumpTableInfo() const
getJumpTableInfo - Return the jump table info object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addCFIIndex(unsigned CFIIndex) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
const MachineInstrBuilder & addMemOperand(MachineMemOperand *MMO) const
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
reverse_iterator getReverse() const
Get a reverse iterator to the same node.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
bool mayLoadOrStore(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read or modify memory.
bool isCopy() const
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
bool getFlag(MIFlag Flag) const
Return whether an MI flag is set.
LLVM_ABI uint32_t mergeFlagsWith(const MachineInstr &Other) const
Return the MIFlags which represent both MachineInstrs.
unsigned getNumOperands() const
Retuns the total number of operands.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
bool mayLoad(QueryType Type=AnyInBundle) const
Return true if this instruction could possibly read memory.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
LLVM_ABI bool hasUnmodeledSideEffects() const
Return true if this instruction has side effects that are not modeled by mayLoad / mayStore,...
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI) const
Returns true if the register is dead in this machine instruction.
bool definesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr fully defines the specified register.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
LLVM_ABI bool hasOrderedMemoryRef() const
Return true if this instruction may have an ordered or volatile memory reference, or if the informati...
LLVM_ABI const MachineFunction * getMF() const
Return the function that contains the basic block that this instruction belongs to.
ArrayRef< MachineMemOperand * > memoperands() const
Access to memory operands of the instruction.
LLVM_ABI bool isLoadFoldBarrier() const
Returns true if it is illegal to fold a load across this instruction.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
LLVM_ABI void addRegisterDefined(Register Reg, const TargetRegisterInfo *RegInfo=nullptr)
We have determined MI defines a register.
const MachineOperand & getOperand(unsigned i) const
uint32_t getFlags() const
Return the MI flags bitvector.
LLVM_ABI int findRegisterDefOperandIdx(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false) const
Returns the operand index that is a def of the specified register or -1 if it is not found.
const std::vector< MachineJumpTableEntry > & getJumpTables() const
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
This class contains meta information specific to a module.
LLVM_ABI MachineFunction * getMachineFunction(const Function &F) const
Returns the MachineFunction associated to IR function F if there is one, otherwise nullptr.
MachineOperand class - Representation of each machine instruction operand.
void setSubReg(unsigned subReg)
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setIsKill(bool Val=true)
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
unsigned getTargetFlags() const
static MachineOperand CreateImm(int64_t Val)
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
LLVM_ABI bool isIdenticalTo(const MachineOperand &Other) const
Returns true if this operand is identical to the specified operand except for liveness related flags ...
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool tracksLiveness() const
tracksLiveness - Returns true when tracking register liveness accurately.
bool reservedRegsFrozen() const
reservedRegsFrozen - Returns true after freezeReservedRegs() was called to ensure the set of reserved...
const TargetRegisterClass * getRegClassOrNull(Register Reg) const
Return the register class of Reg, or null if Reg has not been assigned a register class yet.
LLVM_ABI const TargetRegisterClass * constrainRegClass(Register Reg, const TargetRegisterClass *RC, unsigned MinNumRegs=0)
constrainRegClass - Constrain the register class of the specified virtual register to be a common sub...
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
MI-level patchpoint operands.
Definition StackMaps.h:77
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given patchpoint should emit.
Definition StackMaps.h:105
Wrapper class representing virtual and physical registers.
Definition Register.h:20
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
constexpr bool isValid() const
Definition Register.h:112
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
static constexpr bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:66
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
Represents a location in source code.
Definition SMLoc.h:22
bool erase(PtrType Ptr)
Remove pointer from the set.
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition SmallSet.h:133
bool empty() const
Definition SmallSet.h:168
bool erase(const T &V)
Definition SmallSet.h:199
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(StringRef RHS)
Append from a StringRef.
Definition SmallString.h:68
StringRef str() const
Explicit conversion to StringRef.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
MI-level stackmap operands.
Definition StackMaps.h:36
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given stackmap should emit.
Definition StackMaps.h:51
StackOffset holds a fixed and a scalable offset in bytes.
Definition TypeSize.h:30
int64_t getFixed() const
Returns the fixed component of the stack.
Definition TypeSize.h:46
int64_t getScalable() const
Returns the scalable component of the stack.
Definition TypeSize.h:49
static StackOffset get(int64_t Fixed, int64_t Scalable)
Definition TypeSize.h:41
static StackOffset getScalable(int64_t Scalable)
Definition TypeSize.h:40
static StackOffset getFixed(int64_t Fixed)
Definition TypeSize.h:39
MI-level Statepoint operands.
Definition StackMaps.h:159
uint32_t getNumPatchBytes() const
Return the number of patchable bytes the given statepoint should emit.
Definition StackMaps.h:208
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Object returned by analyzeLoopForPipelining.
TargetInstrInfo - Interface to description of machine instruction set.
virtual void genAlternativeCodeSequence(MachineInstr &Root, unsigned Pattern, SmallVectorImpl< MachineInstr * > &InsInstrs, SmallVectorImpl< MachineInstr * > &DelInstrs, DenseMap< Register, unsigned > &InstIdxForVirtReg) const
When getMachineCombinerPatterns() finds patterns, this function generates the instructions that could...
virtual std::optional< ParamLoadedValue > describeLoadedValue(const MachineInstr &MI, Register Reg) const
Produce the expression describing the MI loading a value into the physical register Reg.
virtual bool getMachineCombinerPatterns(MachineInstr &Root, SmallVectorImpl< unsigned > &Patterns, bool DoRegPressureReduce) const
Return true when there is potentially a faster code sequence for an instruction chain ending in Root.
virtual bool isSchedulingBoundary(const MachineInstr &MI, const MachineBasicBlock *MBB, const MachineFunction &MF) const
Test if the given instruction should be considered a scheduling boundary.
virtual CombinerObjective getCombinerObjective(unsigned Pattern) const
Return the objective of a combiner pattern.
virtual bool isFunctionSafeToSplit(const MachineFunction &MF) const
Return true if the function is a viable candidate for machine function splitting.
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
const MCAsmInfo * getMCAsmInfo() const
Return target specific asm information.
bool contains(Register Reg) const
Return true if the specified register is included in this register class.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
TargetSubtargetInfo - Generic base class for all target subtargets.
virtual const TargetInstrInfo * getInstrInfo() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
Target - Wrapper for Target specific information.
static constexpr TypeSize getFixed(ScalarTy ExactSize)
Definition TypeSize.h:343
static constexpr TypeSize getScalable(ScalarTy MinimumSize)
Definition TypeSize.h:346
Value * getOperand(unsigned i) const
Definition User.h:232
constexpr bool isScalable() const
Returns whether the quantity is scaled by a runtime quantity (vscale).
Definition TypeSize.h:168
constexpr ScalarTy getKnownMinValue() const
Returns the minimum value this quantity can represent.
Definition TypeSize.h:165
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an std::string.
CallInst * Call
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
static CondCode getInvertedCondCode(CondCode Code)
@ MO_DLLIMPORT
MO_DLLIMPORT - On a symbol operand, this represents that the reference to the symbol is for an import...
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_G1
MO_G1 - A symbol operand with this flag (granule 1) represents the bits 16-31 of a 64-bit address,...
@ MO_S
MO_S - Indicates that the bits of the symbol operand represented by MO_G0 etc are signed.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_GOT
MO_GOT - This flag indicates that a symbol operand represents the address of the GOT entry for the sy...
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_G0
MO_G0 - A symbol operand with this flag (granule 0) represents the bits 0-15 of a 64-bit address,...
@ MO_ARM64EC_CALLMANGLE
MO_ARM64EC_CALLMANGLE - Operand refers to the Arm64EC-mangled version of a symbol,...
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_HI12
MO_HI12 - This flag indicates that a symbol operand represents the bits 13-24 of a 64-bit address,...
@ MO_TLS
MO_TLS - Indicates that the operand being accessed is some kind of thread-local symbol.
@ MO_G2
MO_G2 - A symbol operand with this flag (granule 2) represents the bits 32-47 of a 64-bit address,...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
@ MO_COFFSTUB
MO_COFFSTUB - On a symbol operand "FOO", this indicates that the reference is actually to the "....
unsigned getCheckerSizeInBytes(AuthCheckMethod Method)
Returns the number of bytes added by checkAuthenticatedRegister.
static uint64_t decodeLogicalImmediate(uint64_t val, unsigned regSize)
decodeLogicalImmediate - Decode a logical immediate value in the form "N:immr:imms" (where the immr a...
static unsigned getShiftValue(unsigned Imm)
getShiftValue - Extract the shift value.
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getArithShiftValue(unsigned Imm)
getArithShiftValue - get the arithmetic shift value.
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
static AArch64_AM::ShiftExtendType getExtendType(unsigned Imm)
getExtendType - Extract the extend type for operands of arithmetic ops.
static AArch64_AM::ShiftExtendType getArithExtendType(unsigned Imm)
static AArch64_AM::ShiftExtendType getShiftType(unsigned Imm)
getShiftType - Extract the shift type.
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static const uint64_t InstrFlagIsWhile
static const uint64_t InstrFlagIsPTestLike
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Dead
Unused definition.
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
initializer< Ty > init(const Ty &Val)
constexpr double e
InstrType
Represents how an instruction should be mapped by the outliner.
LLVM_ABI Instruction & back() const
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1725
static bool isCondBranchOpcode(int Opc)
MCCFIInstruction createDefCFA(const TargetRegisterInfo &TRI, unsigned FrameReg, unsigned Reg, const StackOffset &Offset, bool LastAdjustmentWasScalable=true)
static bool isPTrueOpcode(unsigned Opc)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool succeeded(LogicalResult Result)
Utility function that returns true if the provided LogicalResult corresponds to a success value.
int isAArch64FrameOffsetLegal(const MachineInstr &MI, StackOffset &Offset, bool *OutUseUnscaledOp=nullptr, unsigned *OutUnscaledOp=nullptr, int64_t *EmittableOffset=nullptr)
Check if the Offset is a valid frame offset for MI.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition STLExtras.h:2472
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
static bool isIndirectBranchOpcode(int Opc)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
unsigned getBLRCallOpcode(const MachineFunction &MF)
Return opcode to be used for indirect calls.
@ AArch64FrameOffsetIsLegal
Offset is legal.
@ AArch64FrameOffsetCanUpdate
Offset can apply, at least partly.
@ AArch64FrameOffsetCannotUpdate
Offset cannot apply.
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
Op::Description Desc
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:337
static bool isSEHInstruction(const MachineInstr &MI)
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1732
auto reverse(ContainerTy &&C)
Definition STLExtras.h:406
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1622
AArch64MachineCombinerPattern
@ MULSUBv8i16_OP2
@ FMULv4i16_indexed_OP1
@ FMLSv1i32_indexed_OP2
@ MULSUBv2i32_indexed_OP1
@ FMLAv2i32_indexed_OP2
@ MULADDv4i16_indexed_OP2
@ FMLAv1i64_indexed_OP1
@ MULSUBv16i8_OP1
@ FMLAv8i16_indexed_OP2
@ FMULv2i32_indexed_OP1
@ MULSUBv8i16_indexed_OP2
@ FMLAv1i64_indexed_OP2
@ MULSUBv4i16_indexed_OP2
@ FMLAv1i32_indexed_OP1
@ FMLAv2i64_indexed_OP2
@ FMLSv8i16_indexed_OP1
@ MULSUBv2i32_OP1
@ FMULv4i16_indexed_OP2
@ MULSUBv4i32_indexed_OP2
@ FMULv2i64_indexed_OP2
@ FMLAv4i32_indexed_OP1
@ MULADDv4i16_OP2
@ FMULv8i16_indexed_OP2
@ MULSUBv4i16_OP1
@ MULADDv4i32_OP2
@ MULADDv2i32_OP2
@ MULADDv16i8_OP2
@ FMLSv4i16_indexed_OP1
@ MULADDv16i8_OP1
@ FMLAv2i64_indexed_OP1
@ FMLAv1i32_indexed_OP2
@ FMLSv2i64_indexed_OP2
@ MULADDv2i32_OP1
@ MULADDv4i32_OP1
@ MULADDv2i32_indexed_OP1
@ MULSUBv16i8_OP2
@ MULADDv4i32_indexed_OP1
@ MULADDv2i32_indexed_OP2
@ FMLAv4i16_indexed_OP2
@ MULSUBv8i16_OP1
@ FMULv2i32_indexed_OP2
@ FMLSv2i32_indexed_OP2
@ FMLSv4i32_indexed_OP1
@ FMULv2i64_indexed_OP1
@ MULSUBv4i16_OP2
@ FMLSv4i16_indexed_OP2
@ FMLAv2i32_indexed_OP1
@ FMLSv2i32_indexed_OP1
@ FMLAv8i16_indexed_OP1
@ MULSUBv4i16_indexed_OP1
@ FMLSv4i32_indexed_OP2
@ MULADDv4i32_indexed_OP2
@ MULSUBv4i32_OP2
@ MULSUBv8i16_indexed_OP1
@ MULADDv8i16_OP2
@ MULSUBv2i32_indexed_OP2
@ FMULv4i32_indexed_OP2
@ FMLSv2i64_indexed_OP1
@ MULADDv4i16_OP1
@ FMLAv4i32_indexed_OP2
@ MULADDv8i16_indexed_OP1
@ FMULv4i32_indexed_OP1
@ FMLAv4i16_indexed_OP1
@ FMULv8i16_indexed_OP1
@ MULADDv8i16_OP1
@ MULSUBv4i32_indexed_OP1
@ MULSUBv4i32_OP1
@ FMLSv8i16_indexed_OP2
@ MULADDv8i16_indexed_OP2
@ MULSUBv2i32_OP2
@ FMLSv1i64_indexed_OP2
@ MULADDv4i16_indexed_OP1
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
CombinerObjective
The combiner's goal may differ based on which pattern it is attempting to optimize.
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
CodeGenOptLevel
Code generation optimization level.
Definition CodeGen.h:82
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
auto instructionsWithoutDebug(IterT It, IterT End, bool SkipPseudoOp=true)
Construct a range iterator which begins at It and moves forwards until End is reached,...
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
auto drop_end(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the last N elements excluded.
Definition STLExtras.h:323
unsigned getUndefRegState(bool B)
static MCRegister getXRegFromWReg(MCRegister Reg)
unsigned getDefRegState(bool B)
MCCFIInstruction createCFAOffset(const TargetRegisterInfo &MRI, unsigned Reg, const StackOffset &OffsetFromDefCFA, std::optional< int64_t > IncomingVGOffsetFromDefCFA)
unsigned getKillRegState(bool B)
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
static bool isUncondBranchOpcode(int Opc)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2120
constexpr bool isIntN(unsigned N, int64_t x)
Checks if an signed integer fits into the given (dynamic) bit width.
Definition MathExtras.h:248
bool rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, StackOffset &Offset, const AArch64InstrInfo *TII)
rewriteAArch64FrameIndex - Rewrite MI to access 'Offset' bytes from the FP.
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1897
static const MachineMemOperand::Flags MOSuppressPair
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
void appendLEB128(SmallVectorImpl< U > &Buffer, T Value)
Definition LEB128.h:236
std::pair< MachineOperand, DIExpression * > ParamLoadedValue
bool isNZCVTouchedInInstructionRange(const MachineInstr &DefMI, const MachineInstr &UseMI, const TargetRegisterInfo *TRI)
Return true if there is an instruction /after/ DefMI and before UseMI which either reads or clobbers ...
auto seq(T Begin, T End)
Iterate over an integral type from Begin up to - but not including - End.
Definition Sequence.h:305
static const MachineMemOperand::Flags MOStridedAccess
void fullyRecomputeLiveIns(ArrayRef< MachineBasicBlock * > MBBs)
Convenience function for recomputing live-in's for a set of MBBs until the computation converges.
LLVM_ABI Printable printReg(Register Reg, const TargetRegisterInfo *TRI=nullptr, unsigned SubIdx=0, const MachineRegisterInfo *MRI=nullptr)
Prints virtual and physical registers with or without a TRI instance.
Used to describe addressing mode similar to ExtAddrMode in CodeGenPrepare.
LLVM_ABI static const MBBSectionID ColdSectionID
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
An individual sequence of instructions to be replaced with a call to an outlined function.
MachineFunction * getMF() const
The information necessary to create an outlined function for some class of candidate.