Line data Source code
1 : //===- AArch64InstrInfo.cpp - AArch64 Instruction Information -------------===//
2 : //
3 : // The LLVM Compiler Infrastructure
4 : //
5 : // This file is distributed under the University of Illinois Open Source
6 : // License. See LICENSE.TXT for details.
7 : //
8 : //===----------------------------------------------------------------------===//
9 : //
10 : // This file contains the AArch64 implementation of the TargetInstrInfo class.
11 : //
12 : //===----------------------------------------------------------------------===//
13 :
14 : #include "AArch64InstrInfo.h"
15 : #include "AArch64MachineFunctionInfo.h"
16 : #include "AArch64Subtarget.h"
17 : #include "MCTargetDesc/AArch64AddressingModes.h"
18 : #include "Utils/AArch64BaseInfo.h"
19 : #include "llvm/ADT/ArrayRef.h"
20 : #include "llvm/ADT/STLExtras.h"
21 : #include "llvm/ADT/SmallVector.h"
22 : #include "llvm/CodeGen/MachineBasicBlock.h"
23 : #include "llvm/CodeGen/MachineFrameInfo.h"
24 : #include "llvm/CodeGen/MachineFunction.h"
25 : #include "llvm/CodeGen/MachineInstr.h"
26 : #include "llvm/CodeGen/MachineInstrBuilder.h"
27 : #include "llvm/CodeGen/MachineMemOperand.h"
28 : #include "llvm/CodeGen/MachineOperand.h"
29 : #include "llvm/CodeGen/MachineRegisterInfo.h"
30 : #include "llvm/CodeGen/MachineModuleInfo.h"
31 : #include "llvm/CodeGen/StackMaps.h"
32 : #include "llvm/CodeGen/TargetRegisterInfo.h"
33 : #include "llvm/CodeGen/TargetSubtargetInfo.h"
34 : #include "llvm/IR/DebugLoc.h"
35 : #include "llvm/IR/GlobalValue.h"
36 : #include "llvm/MC/MCInst.h"
37 : #include "llvm/MC/MCInstrDesc.h"
38 : #include "llvm/Support/Casting.h"
39 : #include "llvm/Support/CodeGen.h"
40 : #include "llvm/Support/CommandLine.h"
41 : #include "llvm/Support/Compiler.h"
42 : #include "llvm/Support/ErrorHandling.h"
43 : #include "llvm/Support/MathExtras.h"
44 : #include "llvm/Target/TargetMachine.h"
45 : #include "llvm/Target/TargetOptions.h"
46 : #include <cassert>
47 : #include <cstdint>
48 : #include <iterator>
49 : #include <utility>
50 :
51 : using namespace llvm;
52 :
53 : #define GET_INSTRINFO_CTOR_DTOR
54 : #include "AArch64GenInstrInfo.inc"
55 :
56 : static cl::opt<unsigned> TBZDisplacementBits(
57 : "aarch64-tbz-offset-bits", cl::Hidden, cl::init(14),
58 : cl::desc("Restrict range of TB[N]Z instructions (DEBUG)"));
59 :
60 : static cl::opt<unsigned> CBZDisplacementBits(
61 : "aarch64-cbz-offset-bits", cl::Hidden, cl::init(19),
62 : cl::desc("Restrict range of CB[N]Z instructions (DEBUG)"));
63 :
64 : static cl::opt<unsigned>
65 : BCCDisplacementBits("aarch64-bcc-offset-bits", cl::Hidden, cl::init(19),
66 : cl::desc("Restrict range of Bcc instructions (DEBUG)"));
67 :
68 1573 : AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI)
69 : : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP),
70 1573 : RI(STI.getTargetTriple()), Subtarget(STI) {}
71 :
72 : /// GetInstSize - Return the number of bytes of code the specified
73 : /// instruction may be. This returns the maximum number of bytes.
74 91199 : unsigned AArch64InstrInfo::getInstSizeInBytes(const MachineInstr &MI) const {
75 91199 : const MachineBasicBlock &MBB = *MI.getParent();
76 91199 : const MachineFunction *MF = MBB.getParent();
77 91199 : const MCAsmInfo *MAI = MF->getTarget().getMCAsmInfo();
78 :
79 182398 : if (MI.getOpcode() == AArch64::INLINEASM)
80 216 : return getInlineAsmLength(MI.getOperand(0).getSymbolName(), *MAI);
81 :
82 : // FIXME: We currently only handle pseudoinstructions that don't get expanded
83 : // before the assembly printer.
84 : unsigned NumBytes = 0;
85 : const MCInstrDesc &Desc = MI.getDesc();
86 90983 : switch (Desc.getOpcode()) {
87 : default:
88 : // Anything not explicitly designated otherwise is a normal 4-byte insn.
89 : NumBytes = 4;
90 : break;
91 3311 : case TargetOpcode::DBG_VALUE:
92 : case TargetOpcode::EH_LABEL:
93 : case TargetOpcode::IMPLICIT_DEF:
94 : case TargetOpcode::KILL:
95 : NumBytes = 0;
96 3311 : break;
97 17 : case TargetOpcode::STACKMAP:
98 : // The upper bound for a stackmap intrinsic is the full length of its shadow
99 17 : NumBytes = StackMapOpers(&MI).getNumPatchBytes();
100 : assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
101 17 : break;
102 48 : case TargetOpcode::PATCHPOINT:
103 : // The size of the patchpoint intrinsic is the number of bytes requested
104 48 : NumBytes = PatchPointOpers(&MI).getNumPatchBytes();
105 : assert(NumBytes % 4 == 0 && "Invalid number of NOP bytes requested!");
106 48 : break;
107 30 : case AArch64::TLSDESC_CALLSEQ:
108 : // This gets lowered to an instruction sequence which takes 16 bytes
109 : NumBytes = 16;
110 30 : break;
111 : }
112 :
113 : return NumBytes;
114 : }
115 :
116 39354 : static void parseCondBranch(MachineInstr *LastInst, MachineBasicBlock *&Target,
117 : SmallVectorImpl<MachineOperand> &Cond) {
118 : // Block ends with fall-through condbranch.
119 78708 : switch (LastInst->getOpcode()) {
120 0 : default:
121 0 : llvm_unreachable("Unknown branch instruction?");
122 18644 : case AArch64::Bcc:
123 18644 : Target = LastInst->getOperand(1).getMBB();
124 18644 : Cond.push_back(LastInst->getOperand(0));
125 18644 : break;
126 12757 : case AArch64::CBZW:
127 : case AArch64::CBZX:
128 : case AArch64::CBNZW:
129 : case AArch64::CBNZX:
130 12757 : Target = LastInst->getOperand(1).getMBB();
131 25514 : Cond.push_back(MachineOperand::CreateImm(-1));
132 38271 : Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
133 12757 : Cond.push_back(LastInst->getOperand(0));
134 12757 : break;
135 7953 : case AArch64::TBZW:
136 : case AArch64::TBZX:
137 : case AArch64::TBNZW:
138 : case AArch64::TBNZX:
139 7953 : Target = LastInst->getOperand(2).getMBB();
140 15906 : Cond.push_back(MachineOperand::CreateImm(-1));
141 23859 : Cond.push_back(MachineOperand::CreateImm(LastInst->getOpcode()));
142 7953 : Cond.push_back(LastInst->getOperand(0));
143 15906 : Cond.push_back(LastInst->getOperand(1));
144 : }
145 39354 : }
146 :
147 : static unsigned getBranchDisplacementBits(unsigned Opc) {
148 1370 : switch (Opc) {
149 0 : default:
150 0 : llvm_unreachable("unexpected opcode!");
151 : case AArch64::B:
152 : return 64;
153 : case AArch64::TBNZW:
154 : case AArch64::TBZW:
155 : case AArch64::TBNZX:
156 : case AArch64::TBZX:
157 : return TBZDisplacementBits;
158 : case AArch64::CBNZW:
159 : case AArch64::CBZW:
160 : case AArch64::CBNZX:
161 : case AArch64::CBZX:
162 : return CBZDisplacementBits;
163 : case AArch64::Bcc:
164 : return BCCDisplacementBits;
165 : }
166 : }
167 :
168 1370 : bool AArch64InstrInfo::isBranchOffsetInRange(unsigned BranchOp,
169 : int64_t BrOffset) const {
170 : unsigned Bits = getBranchDisplacementBits(BranchOp);
171 : assert(Bits >= 3 && "max branch displacement must be enough to jump"
172 : "over conditional branch expansion");
173 1126 : return isIntN(Bits, BrOffset / 4);
174 : }
175 :
176 : MachineBasicBlock *
177 1448 : AArch64InstrInfo::getBranchDestBlock(const MachineInstr &MI) const {
178 2896 : switch (MI.getOpcode()) {
179 0 : default:
180 0 : llvm_unreachable("unexpected opcode!");
181 244 : case AArch64::B:
182 244 : return MI.getOperand(0).getMBB();
183 211 : case AArch64::TBZW:
184 : case AArch64::TBNZW:
185 : case AArch64::TBZX:
186 : case AArch64::TBNZX:
187 211 : return MI.getOperand(2).getMBB();
188 993 : case AArch64::CBZW:
189 : case AArch64::CBNZW:
190 : case AArch64::CBZX:
191 : case AArch64::CBNZX:
192 : case AArch64::Bcc:
193 993 : return MI.getOperand(1).getMBB();
194 : }
195 : }
196 :
197 : // Branch analysis.
198 405272 : bool AArch64InstrInfo::analyzeBranch(MachineBasicBlock &MBB,
199 : MachineBasicBlock *&TBB,
200 : MachineBasicBlock *&FBB,
201 : SmallVectorImpl<MachineOperand> &Cond,
202 : bool AllowModify) const {
203 : // If the block has no terminators, it just falls into the block after it.
204 405272 : MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
205 405272 : if (I == MBB.end())
206 : return false;
207 :
208 403815 : if (!isUnpredicatedTerminator(*I))
209 : return false;
210 :
211 : // Get the last instruction in the block.
212 : MachineInstr *LastInst = &*I;
213 :
214 : // If there is only one terminator instruction, process it.
215 379556 : unsigned LastOpc = LastInst->getOpcode();
216 743645 : if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
217 362063 : if (isUncondBranchOpcode(LastOpc)) {
218 7256 : TBB = LastInst->getOperand(0).getMBB();
219 7256 : return false;
220 : }
221 : if (isCondBranchOpcode(LastOpc)) {
222 : // Block ends with fall-through condbranch.
223 22181 : parseCondBranch(LastInst, TBB, Cond);
224 22181 : return false;
225 : }
226 : return true; // Can't handle indirect branch.
227 : }
228 :
229 : // Get the instruction before it if it is a terminator.
230 : MachineInstr *SecondLastInst = &*I;
231 17493 : unsigned SecondLastOpc = SecondLastInst->getOpcode();
232 :
233 : // If AllowModify is true and the block ends with two or more unconditional
234 : // branches, delete all but the first unconditional branch.
235 17493 : if (AllowModify && isUncondBranchOpcode(LastOpc)) {
236 2394 : while (isUncondBranchOpcode(SecondLastOpc)) {
237 2 : LastInst->eraseFromParent();
238 : LastInst = SecondLastInst;
239 2 : LastOpc = LastInst->getOpcode();
240 4 : if (I == MBB.begin() || !isUnpredicatedTerminator(*--I)) {
241 : // Return now the only terminator is an unconditional branch.
242 2 : TBB = LastInst->getOperand(0).getMBB();
243 2 : return false;
244 : } else {
245 : SecondLastInst = &*I;
246 0 : SecondLastOpc = SecondLastInst->getOpcode();
247 : }
248 : }
249 : }
250 :
251 : // If there are three terminators, we don't know what sort of block this is.
252 33612 : if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(*--I))
253 : return true;
254 :
255 : // If the block ends with a B and a Bcc, handle it.
256 17193 : if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
257 17173 : parseCondBranch(SecondLastInst, TBB, Cond);
258 17173 : FBB = LastInst->getOperand(0).getMBB();
259 17173 : return false;
260 : }
261 :
262 : // If the block ends with two unconditional branches, handle it. The second
263 : // one is not executed, so remove it.
264 132 : if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
265 0 : TBB = SecondLastInst->getOperand(0).getMBB();
266 0 : I = LastInst;
267 0 : if (AllowModify)
268 0 : I->eraseFromParent();
269 0 : return false;
270 : }
271 :
272 : // ...likewise if it ends with an indirect branch followed by an unconditional
273 : // branch.
274 132 : if (isIndirectBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) {
275 0 : I = LastInst;
276 0 : if (AllowModify)
277 0 : I->eraseFromParent();
278 0 : return true;
279 : }
280 :
281 : // Otherwise, can't handle this.
282 : return true;
283 : }
284 :
285 2074 : bool AArch64InstrInfo::reverseBranchCondition(
286 : SmallVectorImpl<MachineOperand> &Cond) const {
287 2074 : if (Cond[0].getImm() != -1) {
288 : // Regular Bcc
289 1351 : AArch64CC::CondCode CC = (AArch64CC::CondCode)(int)Cond[0].getImm();
290 1351 : Cond[0].setImm(AArch64CC::getInvertedCondCode(CC));
291 : } else {
292 : // Folded compare-and-branch
293 723 : switch (Cond[1].getImm()) {
294 0 : default:
295 0 : llvm_unreachable("Unknown conditional branch!");
296 181 : case AArch64::CBZW:
297 : Cond[1].setImm(AArch64::CBNZW);
298 : break;
299 174 : case AArch64::CBNZW:
300 : Cond[1].setImm(AArch64::CBZW);
301 : break;
302 52 : case AArch64::CBZX:
303 : Cond[1].setImm(AArch64::CBNZX);
304 : break;
305 51 : case AArch64::CBNZX:
306 : Cond[1].setImm(AArch64::CBZX);
307 : break;
308 112 : case AArch64::TBZW:
309 : Cond[1].setImm(AArch64::TBNZW);
310 : break;
311 111 : case AArch64::TBNZW:
312 : Cond[1].setImm(AArch64::TBZW);
313 : break;
314 24 : case AArch64::TBZX:
315 : Cond[1].setImm(AArch64::TBNZX);
316 : break;
317 18 : case AArch64::TBNZX:
318 : Cond[1].setImm(AArch64::TBZX);
319 : break;
320 : }
321 : }
322 :
323 2074 : return false;
324 : }
325 :
326 3848 : unsigned AArch64InstrInfo::removeBranch(MachineBasicBlock &MBB,
327 : int *BytesRemoved) const {
328 3848 : MachineBasicBlock::iterator I = MBB.getLastNonDebugInstr();
329 3848 : if (I == MBB.end())
330 : return 0;
331 :
332 7696 : if (!isUncondBranchOpcode(I->getOpcode()) &&
333 : !isCondBranchOpcode(I->getOpcode()))
334 : return 0;
335 :
336 : // Remove the branch.
337 3637 : I->eraseFromParent();
338 :
339 3637 : I = MBB.end();
340 :
341 3637 : if (I == MBB.begin()) {
342 395 : if (BytesRemoved)
343 2 : *BytesRemoved = 4;
344 395 : return 1;
345 : }
346 : --I;
347 3242 : if (!isCondBranchOpcode(I->getOpcode())) {
348 2132 : if (BytesRemoved)
349 6 : *BytesRemoved = 4;
350 2132 : return 1;
351 : }
352 :
353 : // Remove the branch.
354 1110 : I->eraseFromParent();
355 1110 : if (BytesRemoved)
356 2 : *BytesRemoved = 8;
357 :
358 : return 2;
359 : }
360 :
361 2702 : void AArch64InstrInfo::instantiateCondBranch(
362 : MachineBasicBlock &MBB, const DebugLoc &DL, MachineBasicBlock *TBB,
363 : ArrayRef<MachineOperand> Cond) const {
364 2702 : if (Cond[0].getImm() != -1) {
365 : // Regular Bcc
366 1662 : BuildMI(&MBB, DL, get(AArch64::Bcc)).addImm(Cond[0].getImm()).addMBB(TBB);
367 : } else {
368 : // Folded compare-and-branch
369 : // Note that we use addOperand instead of addReg to keep the flags.
370 : const MachineInstrBuilder MIB =
371 1040 : BuildMI(&MBB, DL, get(Cond[1].getImm())).add(Cond[2]);
372 1040 : if (Cond.size() > 3)
373 357 : MIB.addImm(Cond[3].getImm());
374 : MIB.addMBB(TBB);
375 : }
376 2702 : }
377 :
378 3647 : unsigned AArch64InstrInfo::insertBranch(
379 : MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
380 : ArrayRef<MachineOperand> Cond, const DebugLoc &DL, int *BytesAdded) const {
381 : // Shouldn't be a fall through.
382 : assert(TBB && "insertBranch must not be told to insert a fallthrough");
383 :
384 3647 : if (!FBB) {
385 3578 : if (Cond.empty()) // Unconditional branch?
386 945 : BuildMI(&MBB, DL, get(AArch64::B)).addMBB(TBB);
387 : else
388 2633 : instantiateCondBranch(MBB, DL, TBB, Cond);
389 :
390 3578 : if (BytesAdded)
391 2 : *BytesAdded = 4;
392 :
393 3578 : return 1;
394 : }
395 :
396 : // Two-way conditional branch.
397 69 : instantiateCondBranch(MBB, DL, TBB, Cond);
398 69 : BuildMI(&MBB, DL, get(AArch64::B)).addMBB(FBB);
399 :
400 69 : if (BytesAdded)
401 10 : *BytesAdded = 8;
402 :
403 : return 2;
404 : }
405 :
406 : // Find the original register that VReg is copied from.
407 240 : static unsigned removeCopies(const MachineRegisterInfo &MRI, unsigned VReg) {
408 486 : while (TargetRegisterInfo::isVirtualRegister(VReg)) {
409 397 : const MachineInstr *DefMI = MRI.getVRegDef(VReg);
410 : if (!DefMI->isFullCopy())
411 151 : return VReg;
412 246 : VReg = DefMI->getOperand(1).getReg();
413 : }
414 : return VReg;
415 : }
416 :
417 : // Determine if VReg is defined by an instruction that can be folded into a
418 : // csel instruction. If so, return the folded opcode, and the replacement
419 : // register.
420 224 : static unsigned canFoldIntoCSel(const MachineRegisterInfo &MRI, unsigned VReg,
421 : unsigned *NewVReg = nullptr) {
422 224 : VReg = removeCopies(MRI, VReg);
423 224 : if (!TargetRegisterInfo::isVirtualRegister(VReg))
424 : return 0;
425 :
426 151 : bool Is64Bit = AArch64::GPR64allRegClass.hasSubClassEq(MRI.getRegClass(VReg));
427 151 : const MachineInstr *DefMI = MRI.getVRegDef(VReg);
428 : unsigned Opc = 0;
429 : unsigned SrcOpNum = 0;
430 302 : switch (DefMI->getOpcode()) {
431 0 : case AArch64::ADDSXri:
432 : case AArch64::ADDSWri:
433 : // if NZCV is used, do not fold.
434 0 : if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
435 : return 0;
436 : // fall-through to ADDXri and ADDWri.
437 : LLVM_FALLTHROUGH;
438 : case AArch64::ADDXri:
439 : case AArch64::ADDWri:
440 : // add x, 1 -> csinc.
441 186 : if (!DefMI->getOperand(2).isImm() || DefMI->getOperand(2).getImm() != 1 ||
442 10 : DefMI->getOperand(3).getImm() != 0)
443 : return 0;
444 : SrcOpNum = 1;
445 10 : Opc = Is64Bit ? AArch64::CSINCXr : AArch64::CSINCWr;
446 : break;
447 :
448 8 : case AArch64::ORNXrr:
449 : case AArch64::ORNWrr: {
450 : // not x -> csinv, represented as orn dst, xzr, src.
451 8 : unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
452 8 : if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
453 : return 0;
454 : SrcOpNum = 2;
455 8 : Opc = Is64Bit ? AArch64::CSINVXr : AArch64::CSINVWr;
456 : break;
457 : }
458 :
459 8 : case AArch64::SUBSXrr:
460 : case AArch64::SUBSWrr:
461 : // if NZCV is used, do not fold.
462 8 : if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) == -1)
463 : return 0;
464 : // fall-through to SUBXrr and SUBWrr.
465 : LLVM_FALLTHROUGH;
466 : case AArch64::SUBXrr:
467 : case AArch64::SUBWrr: {
468 : // neg x -> csneg, represented as sub dst, xzr, src.
469 8 : unsigned ZReg = removeCopies(MRI, DefMI->getOperand(1).getReg());
470 8 : if (ZReg != AArch64::XZR && ZReg != AArch64::WZR)
471 : return 0;
472 : SrcOpNum = 2;
473 8 : Opc = Is64Bit ? AArch64::CSNEGXr : AArch64::CSNEGWr;
474 : break;
475 : }
476 : default:
477 : return 0;
478 : }
479 : assert(Opc && SrcOpNum && "Missing parameters");
480 :
481 26 : if (NewVReg)
482 26 : *NewVReg = DefMI->getOperand(SrcOpNum).getReg();
483 : return Opc;
484 : }
485 :
486 97 : bool AArch64InstrInfo::canInsertSelect(const MachineBasicBlock &MBB,
487 : ArrayRef<MachineOperand> Cond,
488 : unsigned TrueReg, unsigned FalseReg,
489 : int &CondCycles, int &TrueCycles,
490 : int &FalseCycles) const {
491 : // Check register classes.
492 97 : const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
493 : const TargetRegisterClass *RC =
494 194 : RI.getCommonSubClass(MRI.getRegClass(TrueReg), MRI.getRegClass(FalseReg));
495 97 : if (!RC)
496 : return false;
497 :
498 : // Expanding cbz/tbz requires an extra cycle of latency on the condition.
499 97 : unsigned ExtraCondLat = Cond.size() != 1;
500 :
501 : // GPRs are handled by csel.
502 : // FIXME: Fold in x+1, -x, and ~x when applicable.
503 194 : if (AArch64::GPR64allRegClass.hasSubClassEq(RC) ||
504 41 : AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
505 : // Single-cycle csel, csinc, csinv, and csneg.
506 94 : CondCycles = 1 + ExtraCondLat;
507 94 : TrueCycles = FalseCycles = 1;
508 94 : if (canFoldIntoCSel(MRI, TrueReg))
509 6 : TrueCycles = 0;
510 88 : else if (canFoldIntoCSel(MRI, FalseReg))
511 7 : FalseCycles = 0;
512 94 : return true;
513 : }
514 :
515 : // Scalar floating point is handled by fcsel.
516 : // FIXME: Form fabs, fmin, and fmax when applicable.
517 6 : if (AArch64::FPR64RegClass.hasSubClassEq(RC) ||
518 3 : AArch64::FPR32RegClass.hasSubClassEq(RC)) {
519 0 : CondCycles = 5 + ExtraCondLat;
520 0 : TrueCycles = FalseCycles = 2;
521 0 : return true;
522 : }
523 :
524 : // Can't do vectors.
525 : return false;
526 : }
527 :
528 24 : void AArch64InstrInfo::insertSelect(MachineBasicBlock &MBB,
529 : MachineBasicBlock::iterator I,
530 : const DebugLoc &DL, unsigned DstReg,
531 : ArrayRef<MachineOperand> Cond,
532 : unsigned TrueReg, unsigned FalseReg) const {
533 24 : MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
534 :
535 : // Parse the condition code, see parseCondBranch() above.
536 : AArch64CC::CondCode CC;
537 24 : switch (Cond.size()) {
538 0 : default:
539 0 : llvm_unreachable("Unknown condition opcode in Cond");
540 15 : case 1: // b.cc
541 15 : CC = AArch64CC::CondCode(Cond[0].getImm());
542 15 : break;
543 5 : case 3: { // cbz/cbnz
544 : // We must insert a compare against 0.
545 : bool Is64Bit;
546 5 : switch (Cond[1].getImm()) {
547 0 : default:
548 0 : llvm_unreachable("Unknown branch opcode in Cond");
549 : case AArch64::CBZW:
550 : Is64Bit = false;
551 : CC = AArch64CC::EQ;
552 : break;
553 : case AArch64::CBZX:
554 : Is64Bit = true;
555 : CC = AArch64CC::EQ;
556 : break;
557 : case AArch64::CBNZW:
558 : Is64Bit = false;
559 : CC = AArch64CC::NE;
560 : break;
561 : case AArch64::CBNZX:
562 : Is64Bit = true;
563 : CC = AArch64CC::NE;
564 : break;
565 : }
566 5 : unsigned SrcReg = Cond[2].getReg();
567 5 : if (Is64Bit) {
568 : // cmp reg, #0 is actually subs xzr, reg, #0.
569 2 : MRI.constrainRegClass(SrcReg, &AArch64::GPR64spRegClass);
570 6 : BuildMI(MBB, I, DL, get(AArch64::SUBSXri), AArch64::XZR)
571 2 : .addReg(SrcReg)
572 : .addImm(0)
573 : .addImm(0);
574 : } else {
575 3 : MRI.constrainRegClass(SrcReg, &AArch64::GPR32spRegClass);
576 9 : BuildMI(MBB, I, DL, get(AArch64::SUBSWri), AArch64::WZR)
577 3 : .addReg(SrcReg)
578 : .addImm(0)
579 : .addImm(0);
580 : }
581 : break;
582 : }
583 4 : case 4: { // tbz/tbnz
584 : // We must insert a tst instruction.
585 4 : switch (Cond[1].getImm()) {
586 0 : default:
587 0 : llvm_unreachable("Unknown branch opcode in Cond");
588 : case AArch64::TBZW:
589 : case AArch64::TBZX:
590 : CC = AArch64CC::EQ;
591 : break;
592 2 : case AArch64::TBNZW:
593 : case AArch64::TBNZX:
594 : CC = AArch64CC::NE;
595 2 : break;
596 : }
597 : // cmp reg, #foo is actually ands xzr, reg, #1<<foo.
598 4 : if (Cond[1].getImm() == AArch64::TBZW || Cond[1].getImm() == AArch64::TBNZW)
599 6 : BuildMI(MBB, I, DL, get(AArch64::ANDSWri), AArch64::WZR)
600 2 : .addReg(Cond[2].getReg())
601 : .addImm(
602 2 : AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 32));
603 : else
604 6 : BuildMI(MBB, I, DL, get(AArch64::ANDSXri), AArch64::XZR)
605 2 : .addReg(Cond[2].getReg())
606 : .addImm(
607 2 : AArch64_AM::encodeLogicalImmediate(1ull << Cond[3].getImm(), 64));
608 : break;
609 : }
610 : }
611 :
612 : unsigned Opc = 0;
613 : const TargetRegisterClass *RC = nullptr;
614 : bool TryFold = false;
615 24 : if (MRI.constrainRegClass(DstReg, &AArch64::GPR64RegClass)) {
616 : RC = &AArch64::GPR64RegClass;
617 : Opc = AArch64::CSELXr;
618 : TryFold = true;
619 14 : } else if (MRI.constrainRegClass(DstReg, &AArch64::GPR32RegClass)) {
620 : RC = &AArch64::GPR32RegClass;
621 : Opc = AArch64::CSELWr;
622 : TryFold = true;
623 0 : } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR64RegClass)) {
624 : RC = &AArch64::FPR64RegClass;
625 : Opc = AArch64::FCSELDrrr;
626 0 : } else if (MRI.constrainRegClass(DstReg, &AArch64::FPR32RegClass)) {
627 : RC = &AArch64::FPR32RegClass;
628 : Opc = AArch64::FCSELSrrr;
629 : }
630 : assert(RC && "Unsupported regclass");
631 :
632 : // Try folding simple instructions into the csel.
633 24 : if (TryFold) {
634 24 : unsigned NewVReg = 0;
635 24 : unsigned FoldedOpc = canFoldIntoCSel(MRI, TrueReg, &NewVReg);
636 24 : if (FoldedOpc) {
637 : // The folded opcodes csinc, csinc and csneg apply the operation to
638 : // FalseReg, so we need to invert the condition.
639 : CC = AArch64CC::getInvertedCondCode(CC);
640 : TrueReg = FalseReg;
641 : } else
642 18 : FoldedOpc = canFoldIntoCSel(MRI, FalseReg, &NewVReg);
643 :
644 : // Fold the operation. Leave any dead instructions for DCE to clean up.
645 24 : if (FoldedOpc) {
646 13 : FalseReg = NewVReg;
647 : Opc = FoldedOpc;
648 : // The extends the live range of NewVReg.
649 13 : MRI.clearKillFlags(NewVReg);
650 : }
651 : }
652 :
653 : // Pull all virtual register into the appropriate class.
654 24 : MRI.constrainRegClass(TrueReg, RC);
655 24 : MRI.constrainRegClass(FalseReg, RC);
656 :
657 : // Insert the csel.
658 48 : BuildMI(MBB, I, DL, get(Opc), DstReg)
659 24 : .addReg(TrueReg)
660 24 : .addReg(FalseReg)
661 24 : .addImm(CC);
662 24 : }
663 :
664 : /// Returns true if a MOVi32imm or MOVi64imm can be expanded to an ORRxx.
665 : static bool canBeExpandedToORR(const MachineInstr &MI, unsigned BitSize) {
666 5 : uint64_t Imm = MI.getOperand(1).getImm();
667 5 : uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
668 : uint64_t Encoding;
669 5 : return AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding);
670 : }
671 :
672 : // FIXME: this implementation should be micro-architecture dependent, so a
673 : // micro-architecture target hook should be introduced here in future.
674 13479 : bool AArch64InstrInfo::isAsCheapAsAMove(const MachineInstr &MI) const {
675 13479 : if (!Subtarget.hasCustomCheapAsMoveHandling())
676 12265 : return MI.isAsCheapAsAMove();
677 :
678 1214 : const unsigned Opcode = MI.getOpcode();
679 :
680 : // Firstly, check cases gated by features.
681 :
682 1214 : if (Subtarget.hasZeroCycleZeroingFP()) {
683 2108 : if (Opcode == AArch64::FMOVH0 ||
684 1054 : Opcode == AArch64::FMOVS0 ||
685 : Opcode == AArch64::FMOVD0)
686 : return true;
687 : }
688 :
689 1190 : if (Subtarget.hasZeroCycleZeroingGP()) {
690 373 : if (Opcode == TargetOpcode::COPY &&
691 0 : (MI.getOperand(1).getReg() == AArch64::WZR ||
692 : MI.getOperand(1).getReg() == AArch64::XZR))
693 : return true;
694 : }
695 :
696 : // Secondly, check cases specific to sub-targets.
697 :
698 1190 : if (Subtarget.hasExynosCheapAsMoveHandling()) {
699 657 : if (isExynosResetFast(MI) || isExynosShiftLeftFast(MI))
700 37 : return true;
701 : else
702 620 : return MI.isAsCheapAsAMove();
703 : }
704 :
705 : // Finally, check generic cases.
706 :
707 533 : switch (Opcode) {
708 : default:
709 : return false;
710 :
711 : // add/sub on register without shift
712 18 : case AArch64::ADDWri:
713 : case AArch64::ADDXri:
714 : case AArch64::SUBWri:
715 : case AArch64::SUBXri:
716 18 : return (MI.getOperand(3).getImm() == 0);
717 :
718 : // logical ops on immediate
719 0 : case AArch64::ANDWri:
720 : case AArch64::ANDXri:
721 : case AArch64::EORWri:
722 : case AArch64::EORXri:
723 : case AArch64::ORRWri:
724 : case AArch64::ORRXri:
725 0 : return true;
726 :
727 : // logical ops on register without shift
728 0 : case AArch64::ANDWrr:
729 : case AArch64::ANDXrr:
730 : case AArch64::BICWrr:
731 : case AArch64::BICXrr:
732 : case AArch64::EONWrr:
733 : case AArch64::EONXrr:
734 : case AArch64::EORWrr:
735 : case AArch64::EORXrr:
736 : case AArch64::ORNWrr:
737 : case AArch64::ORNXrr:
738 : case AArch64::ORRWrr:
739 : case AArch64::ORRXrr:
740 0 : return true;
741 :
742 : // If MOVi32imm or MOVi64imm can be expanded into ORRWri or
743 : // ORRXri, it is as cheap as MOV
744 : case AArch64::MOVi32imm:
745 5 : return canBeExpandedToORR(MI, 32);
746 : case AArch64::MOVi64imm:
747 0 : return canBeExpandedToORR(MI, 64);
748 : }
749 :
750 : llvm_unreachable("Unknown opcode to check as cheap as a move!");
751 : }
752 :
753 1259 : bool AArch64InstrInfo::isExynosResetFast(const MachineInstr &MI) const {
754 : unsigned Reg, Imm, Shift;
755 :
756 2518 : switch (MI.getOpcode()) {
757 : default:
758 : return false;
759 :
760 : // MOV Rd, SP
761 322 : case AArch64::ADDWri:
762 : case AArch64::ADDXri:
763 644 : if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm())
764 : return false;
765 :
766 252 : Reg = MI.getOperand(1).getReg();
767 252 : Imm = MI.getOperand(2).getImm();
768 252 : return ((Reg == AArch64::WSP || Reg == AArch64::SP) && Imm == 0);
769 :
770 : // Literal
771 3 : case AArch64::ADR:
772 : case AArch64::ADRP:
773 3 : return true;
774 :
775 : // MOVI Vd, #0
776 47 : case AArch64::MOVID:
777 : case AArch64::MOVIv8b_ns:
778 : case AArch64::MOVIv2d_ns:
779 : case AArch64::MOVIv16b_ns:
780 47 : Imm = MI.getOperand(1).getImm();
781 47 : return (Imm == 0);
782 :
783 : // MOVI Vd, #0
784 0 : case AArch64::MOVIv2i32:
785 : case AArch64::MOVIv4i16:
786 : case AArch64::MOVIv4i32:
787 : case AArch64::MOVIv8i16:
788 0 : Imm = MI.getOperand(1).getImm();
789 0 : Shift = MI.getOperand(2).getImm();
790 0 : return (Imm == 0 && Shift == 0);
791 :
792 : // MOV Rd, Imm
793 0 : case AArch64::MOVNWi:
794 : case AArch64::MOVNXi:
795 :
796 : // MOV Rd, Imm
797 : case AArch64::MOVZWi:
798 : case AArch64::MOVZXi:
799 0 : return true;
800 :
801 : // MOV Rd, Imm
802 7 : case AArch64::ORRWri:
803 : case AArch64::ORRXri:
804 14 : if (!MI.getOperand(1).isReg())
805 : return false;
806 :
807 7 : Reg = MI.getOperand(1).getReg();
808 7 : Imm = MI.getOperand(2).getImm();
809 7 : return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Imm == 0);
810 :
811 : // MOV Rd, Rm
812 72 : case AArch64::ORRWrs:
813 : case AArch64::ORRXrs:
814 144 : if (!MI.getOperand(1).isReg())
815 : return false;
816 :
817 72 : Reg = MI.getOperand(1).getReg();
818 72 : Imm = MI.getOperand(3).getImm();
819 : Shift = AArch64_AM::getShiftValue(Imm);
820 72 : return ((Reg == AArch64::WZR || Reg == AArch64::XZR) && Shift == 0);
821 : }
822 : }
823 :
824 1642 : bool AArch64InstrInfo::isExynosShiftLeftFast(const MachineInstr &MI) const {
825 : unsigned Imm, Shift;
826 : AArch64_AM::ShiftExtendType Ext;
827 :
828 3284 : switch (MI.getOpcode()) {
829 : default:
830 : return false;
831 :
832 : // WriteI
833 358 : case AArch64::ADDSWri:
834 : case AArch64::ADDSXri:
835 : case AArch64::ADDWri:
836 : case AArch64::ADDXri:
837 : case AArch64::SUBSWri:
838 : case AArch64::SUBSXri:
839 : case AArch64::SUBWri:
840 : case AArch64::SUBXri:
841 358 : return true;
842 :
843 : // WriteISReg
844 422 : case AArch64::ADDSWrs:
845 : case AArch64::ADDSXrs:
846 : case AArch64::ADDWrs:
847 : case AArch64::ADDXrs:
848 : case AArch64::ANDSWrs:
849 : case AArch64::ANDSXrs:
850 : case AArch64::ANDWrs:
851 : case AArch64::ANDXrs:
852 : case AArch64::BICSWrs:
853 : case AArch64::BICSXrs:
854 : case AArch64::BICWrs:
855 : case AArch64::BICXrs:
856 : case AArch64::EONWrs:
857 : case AArch64::EONXrs:
858 : case AArch64::EORWrs:
859 : case AArch64::EORXrs:
860 : case AArch64::ORNWrs:
861 : case AArch64::ORNXrs:
862 : case AArch64::ORRWrs:
863 : case AArch64::ORRXrs:
864 : case AArch64::SUBSWrs:
865 : case AArch64::SUBSXrs:
866 : case AArch64::SUBWrs:
867 : case AArch64::SUBXrs:
868 422 : Imm = MI.getOperand(3).getImm();
869 : Shift = AArch64_AM::getShiftValue(Imm);
870 : Ext = AArch64_AM::getShiftType(Imm);
871 422 : return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::LSL));
872 :
873 : // WriteIEReg
874 50 : case AArch64::ADDSWrx:
875 : case AArch64::ADDSXrx:
876 : case AArch64::ADDSXrx64:
877 : case AArch64::ADDWrx:
878 : case AArch64::ADDXrx:
879 : case AArch64::ADDXrx64:
880 : case AArch64::SUBSWrx:
881 : case AArch64::SUBSXrx:
882 : case AArch64::SUBSXrx64:
883 : case AArch64::SUBWrx:
884 : case AArch64::SUBXrx:
885 : case AArch64::SUBXrx64:
886 50 : Imm = MI.getOperand(3).getImm();
887 : Shift = AArch64_AM::getArithShiftValue(Imm);
888 : Ext = AArch64_AM::getArithExtendType(Imm);
889 50 : return (Shift == 0 || (Shift <= 3 && Ext == AArch64_AM::UXTX));
890 :
891 185 : case AArch64::PRFMroW:
892 : case AArch64::PRFMroX:
893 :
894 : // WriteLDIdx
895 : case AArch64::LDRBBroW:
896 : case AArch64::LDRBBroX:
897 : case AArch64::LDRHHroW:
898 : case AArch64::LDRHHroX:
899 : case AArch64::LDRSBWroW:
900 : case AArch64::LDRSBWroX:
901 : case AArch64::LDRSBXroW:
902 : case AArch64::LDRSBXroX:
903 : case AArch64::LDRSHWroW:
904 : case AArch64::LDRSHWroX:
905 : case AArch64::LDRSHXroW:
906 : case AArch64::LDRSHXroX:
907 : case AArch64::LDRSWroW:
908 : case AArch64::LDRSWroX:
909 : case AArch64::LDRWroW:
910 : case AArch64::LDRWroX:
911 : case AArch64::LDRXroW:
912 : case AArch64::LDRXroX:
913 :
914 : case AArch64::LDRBroW:
915 : case AArch64::LDRBroX:
916 : case AArch64::LDRDroW:
917 : case AArch64::LDRDroX:
918 : case AArch64::LDRHroW:
919 : case AArch64::LDRHroX:
920 : case AArch64::LDRSroW:
921 : case AArch64::LDRSroX:
922 :
923 : // WriteSTIdx
924 : case AArch64::STRBBroW:
925 : case AArch64::STRBBroX:
926 : case AArch64::STRHHroW:
927 : case AArch64::STRHHroX:
928 : case AArch64::STRWroW:
929 : case AArch64::STRWroX:
930 : case AArch64::STRXroW:
931 : case AArch64::STRXroX:
932 :
933 : case AArch64::STRBroW:
934 : case AArch64::STRBroX:
935 : case AArch64::STRDroW:
936 : case AArch64::STRDroX:
937 : case AArch64::STRHroW:
938 : case AArch64::STRHroX:
939 : case AArch64::STRSroW:
940 : case AArch64::STRSroX:
941 185 : Imm = MI.getOperand(3).getImm();
942 : Ext = AArch64_AM::getMemExtendType(Imm);
943 185 : return (Ext == AArch64_AM::SXTX || Ext == AArch64_AM::UXTX);
944 : }
945 : }
946 :
947 112 : bool AArch64InstrInfo::isFalkorShiftExtFast(const MachineInstr &MI) const {
948 224 : switch (MI.getOpcode()) {
949 : default:
950 : return false;
951 :
952 6 : case AArch64::ADDWrs:
953 : case AArch64::ADDXrs:
954 : case AArch64::ADDSWrs:
955 : case AArch64::ADDSXrs: {
956 6 : unsigned Imm = MI.getOperand(3).getImm();
957 : unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
958 6 : if (ShiftVal == 0)
959 : return true;
960 0 : return AArch64_AM::getShiftType(Imm) == AArch64_AM::LSL && ShiftVal <= 5;
961 : }
962 :
963 27 : case AArch64::ADDWrx:
964 : case AArch64::ADDXrx:
965 : case AArch64::ADDXrx64:
966 : case AArch64::ADDSWrx:
967 : case AArch64::ADDSXrx:
968 : case AArch64::ADDSXrx64: {
969 27 : unsigned Imm = MI.getOperand(3).getImm();
970 27 : switch (AArch64_AM::getArithExtendType(Imm)) {
971 : default:
972 : return false;
973 : case AArch64_AM::UXTB:
974 : case AArch64_AM::UXTH:
975 : case AArch64_AM::UXTW:
976 : case AArch64_AM::UXTX:
977 27 : return AArch64_AM::getArithShiftValue(Imm) <= 4;
978 : }
979 : }
980 :
981 10 : case AArch64::SUBWrs:
982 : case AArch64::SUBSWrs: {
983 10 : unsigned Imm = MI.getOperand(3).getImm();
984 : unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
985 10 : return ShiftVal == 0 ||
986 0 : (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 31);
987 : }
988 :
989 0 : case AArch64::SUBXrs:
990 : case AArch64::SUBSXrs: {
991 0 : unsigned Imm = MI.getOperand(3).getImm();
992 : unsigned ShiftVal = AArch64_AM::getShiftValue(Imm);
993 0 : return ShiftVal == 0 ||
994 0 : (AArch64_AM::getShiftType(Imm) == AArch64_AM::ASR && ShiftVal == 63);
995 : }
996 :
997 20 : case AArch64::SUBWrx:
998 : case AArch64::SUBXrx:
999 : case AArch64::SUBXrx64:
1000 : case AArch64::SUBSWrx:
1001 : case AArch64::SUBSXrx:
1002 : case AArch64::SUBSXrx64: {
1003 20 : unsigned Imm = MI.getOperand(3).getImm();
1004 20 : switch (AArch64_AM::getArithExtendType(Imm)) {
1005 : default:
1006 : return false;
1007 : case AArch64_AM::UXTB:
1008 : case AArch64_AM::UXTH:
1009 : case AArch64_AM::UXTW:
1010 : case AArch64_AM::UXTX:
1011 20 : return AArch64_AM::getArithShiftValue(Imm) == 0;
1012 : }
1013 : }
1014 :
1015 49 : case AArch64::LDRBBroW:
1016 : case AArch64::LDRBBroX:
1017 : case AArch64::LDRBroW:
1018 : case AArch64::LDRBroX:
1019 : case AArch64::LDRDroW:
1020 : case AArch64::LDRDroX:
1021 : case AArch64::LDRHHroW:
1022 : case AArch64::LDRHHroX:
1023 : case AArch64::LDRHroW:
1024 : case AArch64::LDRHroX:
1025 : case AArch64::LDRQroW:
1026 : case AArch64::LDRQroX:
1027 : case AArch64::LDRSBWroW:
1028 : case AArch64::LDRSBWroX:
1029 : case AArch64::LDRSBXroW:
1030 : case AArch64::LDRSBXroX:
1031 : case AArch64::LDRSHWroW:
1032 : case AArch64::LDRSHWroX:
1033 : case AArch64::LDRSHXroW:
1034 : case AArch64::LDRSHXroX:
1035 : case AArch64::LDRSWroW:
1036 : case AArch64::LDRSWroX:
1037 : case AArch64::LDRSroW:
1038 : case AArch64::LDRSroX:
1039 : case AArch64::LDRWroW:
1040 : case AArch64::LDRWroX:
1041 : case AArch64::LDRXroW:
1042 : case AArch64::LDRXroX:
1043 : case AArch64::PRFMroW:
1044 : case AArch64::PRFMroX:
1045 : case AArch64::STRBBroW:
1046 : case AArch64::STRBBroX:
1047 : case AArch64::STRBroW:
1048 : case AArch64::STRBroX:
1049 : case AArch64::STRDroW:
1050 : case AArch64::STRDroX:
1051 : case AArch64::STRHHroW:
1052 : case AArch64::STRHHroX:
1053 : case AArch64::STRHroW:
1054 : case AArch64::STRHroX:
1055 : case AArch64::STRQroW:
1056 : case AArch64::STRQroX:
1057 : case AArch64::STRSroW:
1058 : case AArch64::STRSroX:
1059 : case AArch64::STRWroW:
1060 : case AArch64::STRWroX:
1061 : case AArch64::STRXroW:
1062 : case AArch64::STRXroX: {
1063 49 : unsigned IsSigned = MI.getOperand(3).getImm();
1064 49 : return !IsSigned;
1065 : }
1066 : }
1067 : }
1068 :
1069 115585 : bool AArch64InstrInfo::isCoalescableExtInstr(const MachineInstr &MI,
1070 : unsigned &SrcReg, unsigned &DstReg,
1071 : unsigned &SubIdx) const {
1072 231170 : switch (MI.getOpcode()) {
1073 : default:
1074 : return false;
1075 641 : case AArch64::SBFMXri: // aka sxtw
1076 : case AArch64::UBFMXri: // aka uxtw
1077 : // Check for the 32 -> 64 bit extension case, these instructions can do
1078 : // much more.
1079 641 : if (MI.getOperand(2).getImm() != 0 || MI.getOperand(3).getImm() != 31)
1080 : return false;
1081 : // This is a signed or unsigned 32 -> 64 bit extension.
1082 37 : SrcReg = MI.getOperand(1).getReg();
1083 37 : DstReg = MI.getOperand(0).getReg();
1084 37 : SubIdx = AArch64::sub_32;
1085 37 : return true;
1086 : }
1087 : }
1088 :
1089 9540 : bool AArch64InstrInfo::areMemAccessesTriviallyDisjoint(
1090 : MachineInstr &MIa, MachineInstr &MIb, AliasAnalysis *AA) const {
1091 : const TargetRegisterInfo *TRI = &getRegisterInfo();
1092 9540 : unsigned BaseRegA = 0, BaseRegB = 0;
1093 9540 : int64_t OffsetA = 0, OffsetB = 0;
1094 9540 : unsigned WidthA = 0, WidthB = 0;
1095 :
1096 : assert(MIa.mayLoadOrStore() && "MIa must be a load or store.");
1097 : assert(MIb.mayLoadOrStore() && "MIb must be a load or store.");
1098 :
1099 28614 : if (MIa.hasUnmodeledSideEffects() || MIb.hasUnmodeledSideEffects() ||
1100 28608 : MIa.hasOrderedMemoryRef() || MIb.hasOrderedMemoryRef())
1101 3032 : return false;
1102 :
1103 : // Retrieve the base register, offset from the base register and width. Width
1104 : // is the size of memory that is being loaded/stored (e.g. 1, 2, 4, 8). If
1105 : // base registers are identical, and the offset of a lower memory access +
1106 : // the width doesn't overlap the offset of a higher memory access,
1107 : // then the memory accesses are different.
1108 11104 : if (getMemOpBaseRegImmOfsWidth(MIa, BaseRegA, OffsetA, WidthA, TRI) &&
1109 4596 : getMemOpBaseRegImmOfsWidth(MIb, BaseRegB, OffsetB, WidthB, TRI)) {
1110 4175 : if (BaseRegA == BaseRegB) {
1111 2415 : int LowOffset = OffsetA < OffsetB ? OffsetA : OffsetB;
1112 2415 : int HighOffset = OffsetA < OffsetB ? OffsetB : OffsetA;
1113 2415 : int LowWidth = (LowOffset == OffsetA) ? WidthA : WidthB;
1114 2415 : if (LowOffset + LowWidth <= HighOffset)
1115 2154 : return true;
1116 : }
1117 : }
1118 : return false;
1119 : }
1120 :
1121 : /// analyzeCompare - For a comparison instruction, return the source registers
1122 : /// in SrcReg and SrcReg2, and the value it compares against in CmpValue.
1123 : /// Return true if the comparison instruction can be analyzed.
1124 1595 : bool AArch64InstrInfo::analyzeCompare(const MachineInstr &MI, unsigned &SrcReg,
1125 : unsigned &SrcReg2, int &CmpMask,
1126 : int &CmpValue) const {
1127 : // The first operand can be a frame index where we'd normally expect a
1128 : // register.
1129 : assert(MI.getNumOperands() >= 2 && "All AArch64 cmps should have 2 operands");
1130 3190 : if (!MI.getOperand(1).isReg())
1131 : return false;
1132 :
1133 3188 : switch (MI.getOpcode()) {
1134 : default:
1135 : break;
1136 904 : case AArch64::SUBSWrr:
1137 : case AArch64::SUBSWrs:
1138 : case AArch64::SUBSWrx:
1139 : case AArch64::SUBSXrr:
1140 : case AArch64::SUBSXrs:
1141 : case AArch64::SUBSXrx:
1142 : case AArch64::ADDSWrr:
1143 : case AArch64::ADDSWrs:
1144 : case AArch64::ADDSWrx:
1145 : case AArch64::ADDSXrr:
1146 : case AArch64::ADDSXrs:
1147 : case AArch64::ADDSXrx:
1148 : // Replace SUBSWrr with SUBWrr if NZCV is not used.
1149 904 : SrcReg = MI.getOperand(1).getReg();
1150 904 : SrcReg2 = MI.getOperand(2).getReg();
1151 904 : CmpMask = ~0;
1152 904 : CmpValue = 0;
1153 904 : return true;
1154 650 : case AArch64::SUBSWri:
1155 : case AArch64::ADDSWri:
1156 : case AArch64::SUBSXri:
1157 : case AArch64::ADDSXri:
1158 650 : SrcReg = MI.getOperand(1).getReg();
1159 650 : SrcReg2 = 0;
1160 650 : CmpMask = ~0;
1161 : // FIXME: In order to convert CmpValue to 0 or 1
1162 650 : CmpValue = MI.getOperand(2).getImm() != 0;
1163 650 : return true;
1164 40 : case AArch64::ANDSWri:
1165 : case AArch64::ANDSXri:
1166 : // ANDS does not use the same encoding scheme as the others xxxS
1167 : // instructions.
1168 40 : SrcReg = MI.getOperand(1).getReg();
1169 40 : SrcReg2 = 0;
1170 40 : CmpMask = ~0;
1171 : // FIXME:The return val type of decodeLogicalImmediate is uint64_t,
1172 : // while the type of CmpValue is int. When converting uint64_t to int,
1173 : // the high 32 bits of uint64_t will be lost.
1174 : // In fact it causes a bug in spec2006-483.xalancbmk
1175 : // CmpValue is only used to compare with zero in OptimizeCompareInstr
1176 40 : CmpValue = AArch64_AM::decodeLogicalImmediate(
1177 40 : MI.getOperand(2).getImm(),
1178 40 : MI.getOpcode() == AArch64::ANDSWri ? 32 : 64) != 0;
1179 40 : return true;
1180 : }
1181 :
1182 : return false;
1183 : }
1184 :
1185 502 : static bool UpdateOperandRegClass(MachineInstr &Instr) {
1186 502 : MachineBasicBlock *MBB = Instr.getParent();
1187 : assert(MBB && "Can't get MachineBasicBlock here");
1188 502 : MachineFunction *MF = MBB->getParent();
1189 : assert(MF && "Can't get MachineFunction here");
1190 502 : const TargetInstrInfo *TII = MF->getSubtarget().getInstrInfo();
1191 502 : const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
1192 502 : MachineRegisterInfo *MRI = &MF->getRegInfo();
1193 :
1194 2224 : for (unsigned OpIdx = 0, EndIdx = Instr.getNumOperands(); OpIdx < EndIdx;
1195 : ++OpIdx) {
1196 1722 : MachineOperand &MO = Instr.getOperand(OpIdx);
1197 : const TargetRegisterClass *OpRegCstraints =
1198 1722 : Instr.getRegClassConstraint(OpIdx, TII, TRI);
1199 :
1200 : // If there's no constraint, there's nothing to do.
1201 1722 : if (!OpRegCstraints)
1202 : continue;
1203 : // If the operand is a frame index, there's nothing to do here.
1204 : // A frame index operand will resolve correctly during PEI.
1205 1345 : if (MO.isFI())
1206 : continue;
1207 :
1208 : assert(MO.isReg() &&
1209 : "Operand has register constraints without being a register!");
1210 :
1211 1343 : unsigned Reg = MO.getReg();
1212 1343 : if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
1213 0 : if (!OpRegCstraints->contains(Reg))
1214 : return false;
1215 2803 : } else if (!OpRegCstraints->hasSubClassEq(MRI->getRegClass(Reg)) &&
1216 117 : !MRI->constrainRegClass(Reg, OpRegCstraints))
1217 : return false;
1218 : }
1219 :
1220 : return true;
1221 : }
1222 :
1223 : /// Return the opcode that does not set flags when possible - otherwise
1224 : /// return the original opcode. The caller is responsible to do the actual
1225 : /// substitution and legality checking.
1226 1139 : static unsigned convertToNonFlagSettingOpc(const MachineInstr &MI) {
1227 : // Don't convert all compare instructions, because for some the zero register
1228 : // encoding becomes the sp register.
1229 : bool MIDefinesZeroReg = false;
1230 2278 : if (MI.definesRegister(AArch64::WZR) || MI.definesRegister(AArch64::XZR))
1231 : MIDefinesZeroReg = true;
1232 :
1233 2278 : switch (MI.getOpcode()) {
1234 0 : default:
1235 0 : return MI.getOpcode();
1236 : case AArch64::ADDSWrr:
1237 : return AArch64::ADDWrr;
1238 0 : case AArch64::ADDSWri:
1239 0 : return MIDefinesZeroReg ? AArch64::ADDSWri : AArch64::ADDWri;
1240 0 : case AArch64::ADDSWrs:
1241 0 : return MIDefinesZeroReg ? AArch64::ADDSWrs : AArch64::ADDWrs;
1242 0 : case AArch64::ADDSWrx:
1243 0 : return AArch64::ADDWrx;
1244 0 : case AArch64::ADDSXrr:
1245 0 : return AArch64::ADDXrr;
1246 0 : case AArch64::ADDSXri:
1247 0 : return MIDefinesZeroReg ? AArch64::ADDSXri : AArch64::ADDXri;
1248 0 : case AArch64::ADDSXrs:
1249 0 : return MIDefinesZeroReg ? AArch64::ADDSXrs : AArch64::ADDXrs;
1250 0 : case AArch64::ADDSXrx:
1251 0 : return AArch64::ADDXrx;
1252 464 : case AArch64::SUBSWrr:
1253 464 : return AArch64::SUBWrr;
1254 137 : case AArch64::SUBSWri:
1255 137 : return MIDefinesZeroReg ? AArch64::SUBSWri : AArch64::SUBWri;
1256 23 : case AArch64::SUBSWrs:
1257 23 : return MIDefinesZeroReg ? AArch64::SUBSWrs : AArch64::SUBWrs;
1258 8 : case AArch64::SUBSWrx:
1259 8 : return AArch64::SUBWrx;
1260 255 : case AArch64::SUBSXrr:
1261 255 : return AArch64::SUBXrr;
1262 228 : case AArch64::SUBSXri:
1263 228 : return MIDefinesZeroReg ? AArch64::SUBSXri : AArch64::SUBXri;
1264 12 : case AArch64::SUBSXrs:
1265 12 : return MIDefinesZeroReg ? AArch64::SUBSXrs : AArch64::SUBXrs;
1266 12 : case AArch64::SUBSXrx:
1267 12 : return AArch64::SUBXrx;
1268 : }
1269 : }
1270 :
1271 : enum AccessKind { AK_Write = 0x01, AK_Read = 0x10, AK_All = 0x11 };
1272 :
1273 : /// True when condition flags are accessed (either by writing or reading)
1274 : /// on the instruction trace starting at From and ending at To.
1275 : ///
1276 : /// Note: If From and To are from different blocks it's assumed CC are accessed
1277 : /// on the path.
1278 35 : static bool areCFlagsAccessedBetweenInstrs(
1279 : MachineBasicBlock::iterator From, MachineBasicBlock::iterator To,
1280 : const TargetRegisterInfo *TRI, const AccessKind AccessToCheck = AK_All) {
1281 : // Early exit if To is at the beginning of the BB.
1282 70 : if (To == To->getParent()->begin())
1283 : return true;
1284 :
1285 : // Check whether the instructions are in the same basic block
1286 : // If not, assume the condition flags might get modified somewhere.
1287 35 : if (To->getParent() != From->getParent())
1288 : return true;
1289 :
1290 : // From must be above To.
1291 : assert(std::find_if(++To.getReverse(), To->getParent()->rend(),
1292 : [From](MachineInstr &MI) {
1293 : return MI.getIterator() == From;
1294 : }) != To->getParent()->rend());
1295 :
1296 : // We iterate backward starting \p To until we hit \p From.
1297 129 : for (--To; To != From; --To) {
1298 : const MachineInstr &Instr = *To;
1299 :
1300 94 : if (((AccessToCheck & AK_Write) &&
1301 94 : Instr.modifiesRegister(AArch64::NZCV, TRI)) ||
1302 188 : ((AccessToCheck & AK_Read) && Instr.readsRegister(AArch64::NZCV, TRI)))
1303 0 : return true;
1304 : }
1305 : return false;
1306 : }
1307 :
1308 : /// Try to optimize a compare instruction. A compare instruction is an
1309 : /// instruction which produces AArch64::NZCV. It can be truly compare
1310 : /// instruction
1311 : /// when there are no uses of its destination register.
1312 : ///
1313 : /// The following steps are tried in order:
1314 : /// 1. Convert CmpInstr into an unconditional version.
1315 : /// 2. Remove CmpInstr if above there is an instruction producing a needed
1316 : /// condition code or an instruction which can be converted into such an
1317 : /// instruction.
1318 : /// Only comparison with zero is supported.
1319 1588 : bool AArch64InstrInfo::optimizeCompareInstr(
1320 : MachineInstr &CmpInstr, unsigned SrcReg, unsigned SrcReg2, int CmpMask,
1321 : int CmpValue, const MachineRegisterInfo *MRI) const {
1322 : assert(CmpInstr.getParent());
1323 : assert(MRI);
1324 :
1325 : // Replace SUBSWrr with SUBWrr if NZCV is not used.
1326 1588 : int DeadNZCVIdx = CmpInstr.findRegisterDefOperandIdx(AArch64::NZCV, true);
1327 1588 : if (DeadNZCVIdx != -1) {
1328 988 : if (CmpInstr.definesRegister(AArch64::WZR) ||
1329 : CmpInstr.definesRegister(AArch64::XZR)) {
1330 0 : CmpInstr.eraseFromParent();
1331 0 : return true;
1332 : }
1333 494 : unsigned Opc = CmpInstr.getOpcode();
1334 494 : unsigned NewOpc = convertToNonFlagSettingOpc(CmpInstr);
1335 494 : if (NewOpc == Opc)
1336 : return false;
1337 494 : const MCInstrDesc &MCID = get(NewOpc);
1338 : CmpInstr.setDesc(MCID);
1339 494 : CmpInstr.RemoveOperand(DeadNZCVIdx);
1340 494 : bool succeeded = UpdateOperandRegClass(CmpInstr);
1341 : (void)succeeded;
1342 : assert(succeeded && "Some operands reg class are incompatible!");
1343 494 : return true;
1344 : }
1345 :
1346 : // Continue only if we have a "ri" where immediate is zero.
1347 : // FIXME:CmpValue has already been converted to 0 or 1 in analyzeCompare
1348 : // function.
1349 : assert((CmpValue == 0 || CmpValue == 1) && "CmpValue must be 0 or 1!");
1350 1094 : if (CmpValue != 0 || SrcReg2 != 0)
1351 : return false;
1352 :
1353 : // CmpInstr is a Compare instruction if destination register is not used.
1354 146 : if (!MRI->use_nodbg_empty(CmpInstr.getOperand(0).getReg()))
1355 : return false;
1356 :
1357 137 : return substituteCmpToZero(CmpInstr, SrcReg, MRI);
1358 : }
1359 :
1360 : /// Get opcode of S version of Instr.
1361 : /// If Instr is S version its opcode is returned.
1362 : /// AArch64::INSTRUCTION_LIST_END is returned if Instr does not have S version
1363 : /// or we are not interested in it.
1364 171 : static unsigned sForm(MachineInstr &Instr) {
1365 342 : switch (Instr.getOpcode()) {
1366 : default:
1367 : return AArch64::INSTRUCTION_LIST_END;
1368 :
1369 0 : case AArch64::ADDSWrr:
1370 : case AArch64::ADDSWri:
1371 : case AArch64::ADDSXrr:
1372 : case AArch64::ADDSXri:
1373 : case AArch64::SUBSWrr:
1374 : case AArch64::SUBSWri:
1375 : case AArch64::SUBSXrr:
1376 : case AArch64::SUBSXri:
1377 0 : return Instr.getOpcode();
1378 :
1379 0 : case AArch64::ADDWrr:
1380 0 : return AArch64::ADDSWrr;
1381 2 : case AArch64::ADDWri:
1382 2 : return AArch64::ADDSWri;
1383 0 : case AArch64::ADDXrr:
1384 0 : return AArch64::ADDSXrr;
1385 4 : case AArch64::ADDXri:
1386 4 : return AArch64::ADDSXri;
1387 0 : case AArch64::ADCWr:
1388 0 : return AArch64::ADCSWr;
1389 0 : case AArch64::ADCXr:
1390 0 : return AArch64::ADCSXr;
1391 12 : case AArch64::SUBWrr:
1392 12 : return AArch64::SUBSWrr;
1393 0 : case AArch64::SUBWri:
1394 0 : return AArch64::SUBSWri;
1395 4 : case AArch64::SUBXrr:
1396 4 : return AArch64::SUBSXrr;
1397 44 : case AArch64::SUBXri:
1398 44 : return AArch64::SUBSXri;
1399 0 : case AArch64::SBCWr:
1400 0 : return AArch64::SBCSWr;
1401 0 : case AArch64::SBCXr:
1402 0 : return AArch64::SBCSXr;
1403 2 : case AArch64::ANDWri:
1404 2 : return AArch64::ANDSWri;
1405 0 : case AArch64::ANDXri:
1406 0 : return AArch64::ANDSXri;
1407 : }
1408 : }
1409 :
1410 : /// Check if AArch64::NZCV should be alive in successors of MBB.
1411 34 : static bool areCFlagsAliveInSuccessors(MachineBasicBlock *MBB) {
1412 76 : for (auto *BB : MBB->successors())
1413 42 : if (BB->isLiveIn(AArch64::NZCV))
1414 : return true;
1415 : return false;
1416 : }
1417 :
1418 : namespace {
1419 :
1420 : struct UsedNZCV {
1421 : bool N = false;
1422 : bool Z = false;
1423 : bool C = false;
1424 : bool V = false;
1425 :
1426 : UsedNZCV() = default;
1427 :
1428 : UsedNZCV &operator|=(const UsedNZCV &UsedFlags) {
1429 : this->N |= UsedFlags.N;
1430 : this->Z |= UsedFlags.Z;
1431 38 : this->C |= UsedFlags.C;
1432 38 : this->V |= UsedFlags.V;
1433 : return *this;
1434 : }
1435 : };
1436 :
1437 : } // end anonymous namespace
1438 :
1439 : /// Find a condition code used by the instruction.
1440 : /// Returns AArch64CC::Invalid if either the instruction does not use condition
1441 : /// codes or we don't optimize CmpInstr in the presence of such instructions.
1442 38 : static AArch64CC::CondCode findCondCodeUsedByInstr(const MachineInstr &Instr) {
1443 76 : switch (Instr.getOpcode()) {
1444 : default:
1445 : return AArch64CC::Invalid;
1446 :
1447 20 : case AArch64::Bcc: {
1448 20 : int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1449 : assert(Idx >= 2);
1450 40 : return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 2).getImm());
1451 : }
1452 :
1453 18 : case AArch64::CSINVWr:
1454 : case AArch64::CSINVXr:
1455 : case AArch64::CSINCWr:
1456 : case AArch64::CSINCXr:
1457 : case AArch64::CSELWr:
1458 : case AArch64::CSELXr:
1459 : case AArch64::CSNEGWr:
1460 : case AArch64::CSNEGXr:
1461 : case AArch64::FCSELSrrr:
1462 : case AArch64::FCSELDrrr: {
1463 18 : int Idx = Instr.findRegisterUseOperandIdx(AArch64::NZCV);
1464 : assert(Idx >= 1);
1465 36 : return static_cast<AArch64CC::CondCode>(Instr.getOperand(Idx - 1).getImm());
1466 : }
1467 : }
1468 : }
1469 :
1470 : static UsedNZCV getUsedNZCV(AArch64CC::CondCode CC) {
1471 : assert(CC != AArch64CC::Invalid);
1472 : UsedNZCV UsedFlags;
1473 : switch (CC) {
1474 : default:
1475 : break;
1476 :
1477 : case AArch64CC::EQ: // Z set
1478 : case AArch64CC::NE: // Z clear
1479 : UsedFlags.Z = true;
1480 : break;
1481 :
1482 : case AArch64CC::HI: // Z clear and C set
1483 : case AArch64CC::LS: // Z set or C clear
1484 : UsedFlags.Z = true;
1485 : LLVM_FALLTHROUGH;
1486 : case AArch64CC::HS: // C set
1487 : case AArch64CC::LO: // C clear
1488 : UsedFlags.C = true;
1489 : break;
1490 :
1491 : case AArch64CC::MI: // N set
1492 : case AArch64CC::PL: // N clear
1493 : UsedFlags.N = true;
1494 : break;
1495 :
1496 : case AArch64CC::VS: // V set
1497 : case AArch64CC::VC: // V clear
1498 : UsedFlags.V = true;
1499 : break;
1500 :
1501 : case AArch64CC::GT: // Z clear, N and V the same
1502 : case AArch64CC::LE: // Z set, N and V differ
1503 : UsedFlags.Z = true;
1504 : LLVM_FALLTHROUGH;
1505 : case AArch64CC::GE: // N and V the same
1506 : case AArch64CC::LT: // N and V differ
1507 : UsedFlags.N = true;
1508 : UsedFlags.V = true;
1509 : break;
1510 : }
1511 : return UsedFlags;
1512 : }
1513 :
1514 : static bool isADDSRegImm(unsigned Opcode) {
1515 34 : return Opcode == AArch64::ADDSWri || Opcode == AArch64::ADDSXri;
1516 : }
1517 :
1518 : static bool isSUBSRegImm(unsigned Opcode) {
1519 34 : return Opcode == AArch64::SUBSWri || Opcode == AArch64::SUBSXri;
1520 : }
1521 :
1522 : /// Check if CmpInstr can be substituted by MI.
1523 : ///
1524 : /// CmpInstr can be substituted:
1525 : /// - CmpInstr is either 'ADDS %vreg, 0' or 'SUBS %vreg, 0'
1526 : /// - and, MI and CmpInstr are from the same MachineBB
1527 : /// - and, condition flags are not alive in successors of the CmpInstr parent
1528 : /// - and, if MI opcode is the S form there must be no defs of flags between
1529 : /// MI and CmpInstr
1530 : /// or if MI opcode is not the S form there must be neither defs of flags
1531 : /// nor uses of flags between MI and CmpInstr.
1532 : /// - and C/V flags are not used after CmpInstr
1533 34 : static bool canInstrSubstituteCmpInstr(MachineInstr *MI, MachineInstr *CmpInstr,
1534 : const TargetRegisterInfo *TRI) {
1535 : assert(MI);
1536 : assert(sForm(*MI) != AArch64::INSTRUCTION_LIST_END);
1537 : assert(CmpInstr);
1538 :
1539 34 : const unsigned CmpOpcode = CmpInstr->getOpcode();
1540 34 : if (!isADDSRegImm(CmpOpcode) && !isSUBSRegImm(CmpOpcode))
1541 : return false;
1542 :
1543 34 : if (MI->getParent() != CmpInstr->getParent())
1544 : return false;
1545 :
1546 34 : if (areCFlagsAliveInSuccessors(CmpInstr->getParent()))
1547 : return false;
1548 :
1549 : AccessKind AccessToCheck = AK_Write;
1550 68 : if (sForm(*MI) != MI->getOpcode())
1551 : AccessToCheck = AK_All;
1552 34 : if (areCFlagsAccessedBetweenInstrs(MI, CmpInstr, TRI, AccessToCheck))
1553 : return false;
1554 :
1555 : UsedNZCV NZCVUsedAfterCmp;
1556 34 : for (auto I = std::next(CmpInstr->getIterator()),
1557 34 : E = CmpInstr->getParent()->instr_end();
1558 128 : I != E; ++I) {
1559 : const MachineInstr &Instr = *I;
1560 95 : if (Instr.readsRegister(AArch64::NZCV, TRI)) {
1561 38 : AArch64CC::CondCode CC = findCondCodeUsedByInstr(Instr);
1562 38 : if (CC == AArch64CC::Invalid) // Unsupported conditional instruction
1563 : return false;
1564 : NZCVUsedAfterCmp |= getUsedNZCV(CC);
1565 : }
1566 :
1567 95 : if (Instr.modifiesRegister(AArch64::NZCV, TRI))
1568 : break;
1569 : }
1570 :
1571 34 : return !NZCVUsedAfterCmp.C && !NZCVUsedAfterCmp.V;
1572 : }
1573 :
1574 : /// Substitute an instruction comparing to zero with another instruction
1575 : /// which produces needed condition flags.
1576 : ///
1577 : /// Return true on success.
1578 137 : bool AArch64InstrInfo::substituteCmpToZero(
1579 : MachineInstr &CmpInstr, unsigned SrcReg,
1580 : const MachineRegisterInfo *MRI) const {
1581 : assert(MRI);
1582 : // Get the unique definition of SrcReg.
1583 137 : MachineInstr *MI = MRI->getUniqueVRegDef(SrcReg);
1584 137 : if (!MI)
1585 : return false;
1586 :
1587 : const TargetRegisterInfo *TRI = &getRegisterInfo();
1588 :
1589 137 : unsigned NewOpc = sForm(*MI);
1590 137 : if (NewOpc == AArch64::INSTRUCTION_LIST_END)
1591 : return false;
1592 :
1593 34 : if (!canInstrSubstituteCmpInstr(MI, &CmpInstr, TRI))
1594 : return false;
1595 :
1596 : // Update the instruction to set NZCV.
1597 8 : MI->setDesc(get(NewOpc));
1598 8 : CmpInstr.eraseFromParent();
1599 8 : bool succeeded = UpdateOperandRegClass(*MI);
1600 : (void)succeeded;
1601 : assert(succeeded && "Some operands reg class are incompatible!");
1602 8 : MI->addRegisterDefined(AArch64::NZCV, TRI);
1603 8 : return true;
1604 : }
1605 :
1606 11126 : bool AArch64InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
1607 22252 : if (MI.getOpcode() != TargetOpcode::LOAD_STACK_GUARD)
1608 : return false;
1609 :
1610 21 : MachineBasicBlock &MBB = *MI.getParent();
1611 : DebugLoc DL = MI.getDebugLoc();
1612 21 : unsigned Reg = MI.getOperand(0).getReg();
1613 : const GlobalValue *GV =
1614 21 : cast<GlobalValue>((*MI.memoperands_begin())->getValue());
1615 21 : const TargetMachine &TM = MBB.getParent()->getTarget();
1616 21 : unsigned char OpFlags = Subtarget.ClassifyGlobalReference(GV, TM);
1617 : const unsigned char MO_NC = AArch64II::MO_NC;
1618 :
1619 21 : if ((OpFlags & AArch64II::MO_GOT) != 0) {
1620 30 : BuildMI(MBB, MI, DL, get(AArch64::LOADgot), Reg)
1621 : .addGlobalAddress(GV, 0, OpFlags);
1622 45 : BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1623 15 : .addReg(Reg, RegState::Kill)
1624 : .addImm(0)
1625 15 : .addMemOperand(*MI.memoperands_begin());
1626 6 : } else if (TM.getCodeModel() == CodeModel::Large) {
1627 4 : BuildMI(MBB, MI, DL, get(AArch64::MOVZXi), Reg)
1628 : .addGlobalAddress(GV, 0, AArch64II::MO_G0 | MO_NC)
1629 : .addImm(0);
1630 4 : BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1631 2 : .addReg(Reg, RegState::Kill)
1632 : .addGlobalAddress(GV, 0, AArch64II::MO_G1 | MO_NC)
1633 : .addImm(16);
1634 4 : BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1635 2 : .addReg(Reg, RegState::Kill)
1636 : .addGlobalAddress(GV, 0, AArch64II::MO_G2 | MO_NC)
1637 : .addImm(32);
1638 4 : BuildMI(MBB, MI, DL, get(AArch64::MOVKXi), Reg)
1639 2 : .addReg(Reg, RegState::Kill)
1640 : .addGlobalAddress(GV, 0, AArch64II::MO_G3)
1641 : .addImm(48);
1642 6 : BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1643 2 : .addReg(Reg, RegState::Kill)
1644 : .addImm(0)
1645 2 : .addMemOperand(*MI.memoperands_begin());
1646 4 : } else if (TM.getCodeModel() == CodeModel::Tiny) {
1647 0 : BuildMI(MBB, MI, DL, get(AArch64::ADR), Reg)
1648 : .addGlobalAddress(GV, 0, OpFlags);
1649 : } else {
1650 8 : BuildMI(MBB, MI, DL, get(AArch64::ADRP), Reg)
1651 4 : .addGlobalAddress(GV, 0, OpFlags | AArch64II::MO_PAGE);
1652 4 : unsigned char LoFlags = OpFlags | AArch64II::MO_PAGEOFF | MO_NC;
1653 12 : BuildMI(MBB, MI, DL, get(AArch64::LDRXui), Reg)
1654 4 : .addReg(Reg, RegState::Kill)
1655 : .addGlobalAddress(GV, 0, LoFlags)
1656 4 : .addMemOperand(*MI.memoperands_begin());
1657 : }
1658 :
1659 21 : MBB.erase(MI);
1660 :
1661 : return true;
1662 : }
1663 :
1664 : /// Return true if this is this instruction has a non-zero immediate
1665 401 : bool AArch64InstrInfo::hasShiftedReg(const MachineInstr &MI) {
1666 802 : switch (MI.getOpcode()) {
1667 : default:
1668 : break;
1669 337 : case AArch64::ADDSWrs:
1670 : case AArch64::ADDSXrs:
1671 : case AArch64::ADDWrs:
1672 : case AArch64::ADDXrs:
1673 : case AArch64::ANDSWrs:
1674 : case AArch64::ANDSXrs:
1675 : case AArch64::ANDWrs:
1676 : case AArch64::ANDXrs:
1677 : case AArch64::BICSWrs:
1678 : case AArch64::BICSXrs:
1679 : case AArch64::BICWrs:
1680 : case AArch64::BICXrs:
1681 : case AArch64::EONWrs:
1682 : case AArch64::EONXrs:
1683 : case AArch64::EORWrs:
1684 : case AArch64::EORXrs:
1685 : case AArch64::ORNWrs:
1686 : case AArch64::ORNXrs:
1687 : case AArch64::ORRWrs:
1688 : case AArch64::ORRXrs:
1689 : case AArch64::SUBSWrs:
1690 : case AArch64::SUBSXrs:
1691 : case AArch64::SUBWrs:
1692 : case AArch64::SUBXrs:
1693 674 : if (MI.getOperand(3).isImm()) {
1694 337 : unsigned val = MI.getOperand(3).getImm();
1695 337 : return (val != 0);
1696 0 : }
1697 : break;
1698 : }
1699 : return false;
1700 : }
1701 :
1702 : /// Return true if this is this instruction has a non-zero immediate
1703 15 : bool AArch64InstrInfo::hasExtendedReg(const MachineInstr &MI) {
1704 30 : switch (MI.getOpcode()) {
1705 : default:
1706 : break;
1707 15 : case AArch64::ADDSWrx:
1708 : case AArch64::ADDSXrx:
1709 : case AArch64::ADDSXrx64:
1710 : case AArch64::ADDWrx:
1711 : case AArch64::ADDXrx:
1712 : case AArch64::ADDXrx64:
1713 : case AArch64::SUBSWrx:
1714 : case AArch64::SUBSXrx:
1715 : case AArch64::SUBSXrx64:
1716 : case AArch64::SUBWrx:
1717 : case AArch64::SUBXrx:
1718 : case AArch64::SUBXrx64:
1719 30 : if (MI.getOperand(3).isImm()) {
1720 15 : unsigned val = MI.getOperand(3).getImm();
1721 15 : return (val != 0);
1722 0 : }
1723 : break;
1724 : }
1725 :
1726 : return false;
1727 : }
1728 :
1729 : // Return true if this instruction simply sets its single destination register
1730 : // to zero. This is equivalent to a register rename of the zero-register.
1731 90 : bool AArch64InstrInfo::isGPRZero(const MachineInstr &MI) {
1732 180 : switch (MI.getOpcode()) {
1733 : default:
1734 : break;
1735 46 : case AArch64::MOVZWi:
1736 : case AArch64::MOVZXi: // movz Rd, #0 (LSL #0)
1737 92 : if (MI.getOperand(1).isImm() && MI.getOperand(1).getImm() == 0) {
1738 : assert(MI.getDesc().getNumOperands() == 3 &&
1739 : MI.getOperand(2).getImm() == 0 && "invalid MOVZi operands");
1740 0 : return true;
1741 : }
1742 : break;
1743 16 : case AArch64::ANDWri: // and Rd, Rzr, #imm
1744 16 : return MI.getOperand(1).getReg() == AArch64::WZR;
1745 28 : case AArch64::ANDXri:
1746 28 : return MI.getOperand(1).getReg() == AArch64::XZR;
1747 0 : case TargetOpcode::COPY:
1748 0 : return MI.getOperand(1).getReg() == AArch64::WZR;
1749 : }
1750 : return false;
1751 : }
1752 :
1753 : // Return true if this instruction simply renames a general register without
1754 : // modifying bits.
1755 9925 : bool AArch64InstrInfo::isGPRCopy(const MachineInstr &MI) {
1756 19850 : switch (MI.getOpcode()) {
1757 : default:
1758 : break;
1759 9719 : case TargetOpcode::COPY: {
1760 : // GPR32 copies will by lowered to ORRXrs
1761 9719 : unsigned DstReg = MI.getOperand(0).getReg();
1762 11607 : return (AArch64::GPR32RegClass.contains(DstReg) ||
1763 9055 : AArch64::GPR64RegClass.contains(DstReg));
1764 : }
1765 0 : case AArch64::ORRXrs: // orr Xd, Xzr, Xm (LSL #0)
1766 0 : if (MI.getOperand(1).getReg() == AArch64::XZR) {
1767 : assert(MI.getDesc().getNumOperands() == 4 &&
1768 : MI.getOperand(3).getImm() == 0 && "invalid ORRrs operands");
1769 0 : return true;
1770 : }
1771 : break;
1772 0 : case AArch64::ADDXri: // add Xd, Xn, #0 (LSL #0)
1773 0 : if (MI.getOperand(2).getImm() == 0) {
1774 : assert(MI.getDesc().getNumOperands() == 4 &&
1775 : MI.getOperand(3).getImm() == 0 && "invalid ADDXri operands");
1776 0 : return true;
1777 : }
1778 : break;
1779 : }
1780 : return false;
1781 : }
1782 :
1783 : // Return true if this instruction simply renames a general register without
1784 : // modifying bits.
1785 8500 : bool AArch64InstrInfo::isFPRCopy(const MachineInstr &MI) {
1786 17000 : switch (MI.getOpcode()) {
1787 : default:
1788 : break;
1789 8294 : case TargetOpcode::COPY: {
1790 : // FPR64 copies will by lowered to ORR.16b
1791 8294 : unsigned DstReg = MI.getOperand(0).getReg();
1792 9106 : return (AArch64::FPR64RegClass.contains(DstReg) ||
1793 7979 : AArch64::FPR128RegClass.contains(DstReg));
1794 : }
1795 0 : case AArch64::ORRv16i8:
1796 0 : if (MI.getOperand(1).getReg() == MI.getOperand(2).getReg()) {
1797 : assert(MI.getDesc().getNumOperands() == 3 && MI.getOperand(0).isReg() &&
1798 : "invalid ORRv16i8 operands");
1799 0 : return true;
1800 : }
1801 : break;
1802 : }
1803 : return false;
1804 : }
1805 :
1806 10950 : unsigned AArch64InstrInfo::isLoadFromStackSlot(const MachineInstr &MI,
1807 : int &FrameIndex) const {
1808 21900 : switch (MI.getOpcode()) {
1809 : default:
1810 : break;
1811 806 : case AArch64::LDRWui:
1812 : case AArch64::LDRXui:
1813 : case AArch64::LDRBui:
1814 : case AArch64::LDRHui:
1815 : case AArch64::LDRSui:
1816 : case AArch64::LDRDui:
1817 : case AArch64::LDRQui:
1818 806 : if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1819 1211 : MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1820 405 : FrameIndex = MI.getOperand(1).getIndex();
1821 405 : return MI.getOperand(0).getReg();
1822 : }
1823 : break;
1824 : }
1825 :
1826 : return 0;
1827 : }
1828 :
1829 5253 : unsigned AArch64InstrInfo::isStoreToStackSlot(const MachineInstr &MI,
1830 : int &FrameIndex) const {
1831 5253 : switch (MI.getOpcode()) {
1832 : default:
1833 : break;
1834 383 : case AArch64::STRWui:
1835 : case AArch64::STRXui:
1836 : case AArch64::STRBui:
1837 : case AArch64::STRHui:
1838 : case AArch64::STRSui:
1839 : case AArch64::STRDui:
1840 : case AArch64::STRQui:
1841 383 : if (MI.getOperand(0).getSubReg() == 0 && MI.getOperand(1).isFI() &&
1842 463 : MI.getOperand(2).isImm() && MI.getOperand(2).getImm() == 0) {
1843 71 : FrameIndex = MI.getOperand(1).getIndex();
1844 71 : return MI.getOperand(0).getReg();
1845 : }
1846 : break;
1847 : }
1848 : return 0;
1849 : }
1850 :
1851 : /// Return true if this is load/store scales or extends its register offset.
1852 : /// This refers to scaling a dynamic index as opposed to scaled immediates.
1853 : /// MI should be a memory op that allows scaled addressing.
1854 685 : bool AArch64InstrInfo::isScaledAddr(const MachineInstr &MI) {
1855 1370 : switch (MI.getOpcode()) {
1856 : default:
1857 : break;
1858 685 : case AArch64::LDRBBroW:
1859 : case AArch64::LDRBroW:
1860 : case AArch64::LDRDroW:
1861 : case AArch64::LDRHHroW:
1862 : case AArch64::LDRHroW:
1863 : case AArch64::LDRQroW:
1864 : case AArch64::LDRSBWroW:
1865 : case AArch64::LDRSBXroW:
1866 : case AArch64::LDRSHWroW:
1867 : case AArch64::LDRSHXroW:
1868 : case AArch64::LDRSWroW:
1869 : case AArch64::LDRSroW:
1870 : case AArch64::LDRWroW:
1871 : case AArch64::LDRXroW:
1872 : case AArch64::STRBBroW:
1873 : case AArch64::STRBroW:
1874 : case AArch64::STRDroW:
1875 : case AArch64::STRHHroW:
1876 : case AArch64::STRHroW:
1877 : case AArch64::STRQroW:
1878 : case AArch64::STRSroW:
1879 : case AArch64::STRWroW:
1880 : case AArch64::STRXroW:
1881 : case AArch64::LDRBBroX:
1882 : case AArch64::LDRBroX:
1883 : case AArch64::LDRDroX:
1884 : case AArch64::LDRHHroX:
1885 : case AArch64::LDRHroX:
1886 : case AArch64::LDRQroX:
1887 : case AArch64::LDRSBWroX:
1888 : case AArch64::LDRSBXroX:
1889 : case AArch64::LDRSHWroX:
1890 : case AArch64::LDRSHXroX:
1891 : case AArch64::LDRSWroX:
1892 : case AArch64::LDRSroX:
1893 : case AArch64::LDRWroX:
1894 : case AArch64::LDRXroX:
1895 : case AArch64::STRBBroX:
1896 : case AArch64::STRBroX:
1897 : case AArch64::STRDroX:
1898 : case AArch64::STRHHroX:
1899 : case AArch64::STRHroX:
1900 : case AArch64::STRQroX:
1901 : case AArch64::STRSroX:
1902 : case AArch64::STRWroX:
1903 : case AArch64::STRXroX:
1904 :
1905 685 : unsigned Val = MI.getOperand(3).getImm();
1906 : AArch64_AM::ShiftExtendType ExtType = AArch64_AM::getMemExtendType(Val);
1907 685 : return (ExtType != AArch64_AM::UXTX) || AArch64_AM::getMemDoShift(Val);
1908 : }
1909 : return false;
1910 : }
1911 :
1912 : /// Check all MachineMemOperands for a hint to suppress pairing.
1913 25617 : bool AArch64InstrInfo::isLdStPairSuppressed(const MachineInstr &MI) {
1914 25617 : return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1915 0 : return MMO->getFlags() & MOSuppressPair;
1916 25617 : });
1917 : }
1918 :
1919 : /// Set a flag on the first MachineMemOperand to suppress pairing.
1920 9 : void AArch64InstrInfo::suppressLdStPair(MachineInstr &MI) {
1921 9 : if (MI.memoperands_empty())
1922 : return;
1923 9 : (*MI.memoperands_begin())->setFlags(MOSuppressPair);
1924 : }
1925 :
1926 : /// Check all MachineMemOperands for a hint that the load/store is strided.
1927 133 : bool AArch64InstrInfo::isStridedAccess(const MachineInstr &MI) {
1928 133 : return llvm::any_of(MI.memoperands(), [](MachineMemOperand *MMO) {
1929 0 : return MMO->getFlags() & MOStridedAccess;
1930 133 : });
1931 : }
1932 :
1933 40183 : bool AArch64InstrInfo::isUnscaledLdSt(unsigned Opc) {
1934 40183 : switch (Opc) {
1935 : default:
1936 : return false;
1937 1159 : case AArch64::STURSi:
1938 : case AArch64::STURDi:
1939 : case AArch64::STURQi:
1940 : case AArch64::STURBBi:
1941 : case AArch64::STURHHi:
1942 : case AArch64::STURWi:
1943 : case AArch64::STURXi:
1944 : case AArch64::LDURSi:
1945 : case AArch64::LDURDi:
1946 : case AArch64::LDURQi:
1947 : case AArch64::LDURWi:
1948 : case AArch64::LDURXi:
1949 : case AArch64::LDURSWi:
1950 : case AArch64::LDURHHi:
1951 : case AArch64::LDURBBi:
1952 : case AArch64::LDURSBWi:
1953 : case AArch64::LDURSHWi:
1954 1159 : return true;
1955 : }
1956 : }
1957 :
1958 76204 : bool AArch64InstrInfo::isPairableLdStInst(const MachineInstr &MI) {
1959 152408 : switch (MI.getOpcode()) {
1960 : default:
1961 : return false;
1962 : // Scaled instructions.
1963 12457 : case AArch64::STRSui:
1964 : case AArch64::STRDui:
1965 : case AArch64::STRQui:
1966 : case AArch64::STRXui:
1967 : case AArch64::STRWui:
1968 : case AArch64::LDRSui:
1969 : case AArch64::LDRDui:
1970 : case AArch64::LDRQui:
1971 : case AArch64::LDRXui:
1972 : case AArch64::LDRWui:
1973 : case AArch64::LDRSWui:
1974 : // Unscaled instructions.
1975 : case AArch64::STURSi:
1976 : case AArch64::STURDi:
1977 : case AArch64::STURQi:
1978 : case AArch64::STURWi:
1979 : case AArch64::STURXi:
1980 : case AArch64::LDURSi:
1981 : case AArch64::LDURDi:
1982 : case AArch64::LDURQi:
1983 : case AArch64::LDURWi:
1984 : case AArch64::LDURXi:
1985 : case AArch64::LDURSWi:
1986 12457 : return true;
1987 : }
1988 : }
1989 :
1990 23 : unsigned AArch64InstrInfo::convertToFlagSettingOpc(unsigned Opc,
1991 : bool &Is64Bit) {
1992 23 : switch (Opc) {
1993 0 : default:
1994 0 : llvm_unreachable("Opcode has no flag setting equivalent!");
1995 : // 32-bit cases:
1996 3 : case AArch64::ADDWri:
1997 3 : Is64Bit = false;
1998 3 : return AArch64::ADDSWri;
1999 4 : case AArch64::ADDWrr:
2000 4 : Is64Bit = false;
2001 4 : return AArch64::ADDSWrr;
2002 0 : case AArch64::ADDWrs:
2003 0 : Is64Bit = false;
2004 0 : return AArch64::ADDSWrs;
2005 0 : case AArch64::ADDWrx:
2006 0 : Is64Bit = false;
2007 0 : return AArch64::ADDSWrx;
2008 3 : case AArch64::ANDWri:
2009 3 : Is64Bit = false;
2010 3 : return AArch64::ANDSWri;
2011 0 : case AArch64::ANDWrr:
2012 0 : Is64Bit = false;
2013 0 : return AArch64::ANDSWrr;
2014 0 : case AArch64::ANDWrs:
2015 0 : Is64Bit = false;
2016 0 : return AArch64::ANDSWrs;
2017 1 : case AArch64::BICWrr:
2018 1 : Is64Bit = false;
2019 1 : return AArch64::BICSWrr;
2020 0 : case AArch64::BICWrs:
2021 0 : Is64Bit = false;
2022 0 : return AArch64::BICSWrs;
2023 0 : case AArch64::SUBWri:
2024 0 : Is64Bit = false;
2025 0 : return AArch64::SUBSWri;
2026 0 : case AArch64::SUBWrr:
2027 0 : Is64Bit = false;
2028 0 : return AArch64::SUBSWrr;
2029 0 : case AArch64::SUBWrs:
2030 0 : Is64Bit = false;
2031 0 : return AArch64::SUBSWrs;
2032 0 : case AArch64::SUBWrx:
2033 0 : Is64Bit = false;
2034 0 : return AArch64::SUBSWrx;
2035 : // 64-bit cases:
2036 10 : case AArch64::ADDXri:
2037 10 : Is64Bit = true;
2038 10 : return AArch64::ADDSXri;
2039 1 : case AArch64::ADDXrr:
2040 1 : Is64Bit = true;
2041 1 : return AArch64::ADDSXrr;
2042 0 : case AArch64::ADDXrs:
2043 0 : Is64Bit = true;
2044 0 : return AArch64::ADDSXrs;
2045 0 : case AArch64::ADDXrx:
2046 0 : Is64Bit = true;
2047 0 : return AArch64::ADDSXrx;
2048 1 : case AArch64::ANDXri:
2049 1 : Is64Bit = true;
2050 1 : return AArch64::ANDSXri;
2051 0 : case AArch64::ANDXrr:
2052 0 : Is64Bit = true;
2053 0 : return AArch64::ANDSXrr;
2054 0 : case AArch64::ANDXrs:
2055 0 : Is64Bit = true;
2056 0 : return AArch64::ANDSXrs;
2057 0 : case AArch64::BICXrr:
2058 0 : Is64Bit = true;
2059 0 : return AArch64::BICSXrr;
2060 0 : case AArch64::BICXrs:
2061 0 : Is64Bit = true;
2062 0 : return AArch64::BICSXrs;
2063 0 : case AArch64::SUBXri:
2064 0 : Is64Bit = true;
2065 0 : return AArch64::SUBSXri;
2066 0 : case AArch64::SUBXrr:
2067 0 : Is64Bit = true;
2068 0 : return AArch64::SUBSXrr;
2069 0 : case AArch64::SUBXrs:
2070 0 : Is64Bit = true;
2071 0 : return AArch64::SUBSXrs;
2072 0 : case AArch64::SUBXrx:
2073 0 : Is64Bit = true;
2074 0 : return AArch64::SUBSXrx;
2075 : }
2076 : }
2077 :
2078 : // Is this a candidate for ld/st merging or pairing? For example, we don't
2079 : // touch volatiles or load/stores that have a hint to avoid pair formation.
2080 12380 : bool AArch64InstrInfo::isCandidateToMergeOrPair(MachineInstr &MI) const {
2081 : // If this is a volatile load/store, don't mess with it.
2082 12380 : if (MI.hasOrderedMemoryRef())
2083 : return false;
2084 :
2085 : // Make sure this is a reg+imm (as opposed to an address reloc).
2086 : assert(MI.getOperand(1).isReg() && "Expected a reg operand.");
2087 17158 : if (!MI.getOperand(2).isImm())
2088 : return false;
2089 :
2090 : // Can't merge/pair if the instruction modifies the base register.
2091 : // e.g., ldr x0, [x0]
2092 7863 : unsigned BaseReg = MI.getOperand(1).getReg();
2093 : const TargetRegisterInfo *TRI = &getRegisterInfo();
2094 7863 : if (MI.modifiesRegister(BaseReg, TRI))
2095 : return false;
2096 :
2097 : // Check if this load/store has a hint to avoid pair formation.
2098 : // MachineMemOperands hints are set by the AArch64StorePairSuppress pass.
2099 7744 : if (isLdStPairSuppressed(MI))
2100 : return false;
2101 :
2102 : // On some CPUs quad load/store pairs are slower than two single load/stores.
2103 7733 : if (Subtarget.isPaired128Slow()) {
2104 244 : switch (MI.getOpcode()) {
2105 : default:
2106 : break;
2107 98 : case AArch64::LDURQi:
2108 : case AArch64::STURQi:
2109 : case AArch64::LDRQui:
2110 : case AArch64::STRQui:
2111 98 : return false;
2112 : }
2113 : }
2114 :
2115 : return true;
2116 : }
2117 :
2118 15842 : bool AArch64InstrInfo::getMemOpBaseRegImmOfs(
2119 : MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset,
2120 : const TargetRegisterInfo *TRI) const {
2121 : unsigned Width;
2122 15842 : return getMemOpBaseRegImmOfsWidth(LdSt, BaseReg, Offset, Width, TRI);
2123 : }
2124 :
2125 27043 : bool AArch64InstrInfo::getMemOpBaseRegImmOfsWidth(
2126 : MachineInstr &LdSt, unsigned &BaseReg, int64_t &Offset, unsigned &Width,
2127 : const TargetRegisterInfo *TRI) const {
2128 : assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2129 : // Handle only loads/stores with base register followed by immediate offset.
2130 27043 : if (LdSt.getNumExplicitOperands() == 3) {
2131 : // Non-paired instruction (e.g., ldr x1, [x0, #8]).
2132 44686 : if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isImm())
2133 : return false;
2134 4700 : } else if (LdSt.getNumExplicitOperands() == 4) {
2135 : // Paired instruction (e.g., ldp x1, x2, [x0, #8]).
2136 5276 : if (!LdSt.getOperand(1).isReg() || !LdSt.getOperand(2).isReg() ||
2137 : !LdSt.getOperand(3).isImm())
2138 : return false;
2139 : } else
2140 : return false;
2141 :
2142 : // Get the scaling factor for the instruction and set the width for the
2143 : // instruction.
2144 16800 : unsigned Scale = 0;
2145 : int64_t Dummy1, Dummy2;
2146 :
2147 : // If this returns false, then it's an instruction we don't want to handle.
2148 33600 : if (!getMemOpInfo(LdSt.getOpcode(), Scale, Width, Dummy1, Dummy2))
2149 : return false;
2150 :
2151 : // Compute the offset. Offset is calculated as the immediate operand
2152 : // multiplied by the scaling factor. Unscaled instructions have scaling factor
2153 : // set to 1.
2154 16479 : if (LdSt.getNumExplicitOperands() == 3) {
2155 14819 : BaseReg = LdSt.getOperand(1).getReg();
2156 14819 : Offset = LdSt.getOperand(2).getImm() * Scale;
2157 : } else {
2158 : assert(LdSt.getNumExplicitOperands() == 4 && "invalid number of operands");
2159 1660 : BaseReg = LdSt.getOperand(2).getReg();
2160 1660 : Offset = LdSt.getOperand(3).getImm() * Scale;
2161 : }
2162 : return true;
2163 : }
2164 :
2165 : MachineOperand &
2166 0 : AArch64InstrInfo::getMemOpBaseRegImmOfsOffsetOperand(MachineInstr &LdSt) const {
2167 : assert(LdSt.mayLoadOrStore() && "Expected a memory operation.");
2168 0 : MachineOperand &OfsOp = LdSt.getOperand(LdSt.getNumExplicitOperands() - 1);
2169 : assert(OfsOp.isImm() && "Offset operand wasn't immediate.");
2170 0 : return OfsOp;
2171 : }
2172 :
2173 16897 : bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
2174 : unsigned &Width, int64_t &MinOffset,
2175 : int64_t &MaxOffset) const {
2176 16897 : switch (Opcode) {
2177 : // Not a memory operation or something we want to handle.
2178 321 : default:
2179 321 : Scale = Width = 0;
2180 321 : MinOffset = MaxOffset = 0;
2181 321 : return false;
2182 19 : case AArch64::STRWpost:
2183 : case AArch64::LDRWpost:
2184 19 : Width = 32;
2185 19 : Scale = 4;
2186 19 : MinOffset = -256;
2187 19 : MaxOffset = 255;
2188 19 : break;
2189 208 : case AArch64::LDURQi:
2190 : case AArch64::STURQi:
2191 208 : Width = 16;
2192 208 : Scale = 1;
2193 208 : MinOffset = -256;
2194 208 : MaxOffset = 255;
2195 208 : break;
2196 194 : case AArch64::LDURXi:
2197 : case AArch64::LDURDi:
2198 : case AArch64::STURXi:
2199 : case AArch64::STURDi:
2200 194 : Width = 8;
2201 194 : Scale = 1;
2202 194 : MinOffset = -256;
2203 194 : MaxOffset = 255;
2204 194 : break;
2205 220 : case AArch64::LDURWi:
2206 : case AArch64::LDURSi:
2207 : case AArch64::LDURSWi:
2208 : case AArch64::STURWi:
2209 : case AArch64::STURSi:
2210 220 : Width = 4;
2211 220 : Scale = 1;
2212 220 : MinOffset = -256;
2213 220 : MaxOffset = 255;
2214 220 : break;
2215 106 : case AArch64::LDURHi:
2216 : case AArch64::LDURHHi:
2217 : case AArch64::LDURSHXi:
2218 : case AArch64::LDURSHWi:
2219 : case AArch64::STURHi:
2220 : case AArch64::STURHHi:
2221 106 : Width = 2;
2222 106 : Scale = 1;
2223 106 : MinOffset = -256;
2224 106 : MaxOffset = 255;
2225 106 : break;
2226 84 : case AArch64::LDURBi:
2227 : case AArch64::LDURBBi:
2228 : case AArch64::LDURSBXi:
2229 : case AArch64::LDURSBWi:
2230 : case AArch64::STURBi:
2231 : case AArch64::STURBBi:
2232 84 : Width = 1;
2233 84 : Scale = 1;
2234 84 : MinOffset = -256;
2235 84 : MaxOffset = 255;
2236 84 : break;
2237 917 : case AArch64::LDPQi:
2238 : case AArch64::LDNPQi:
2239 : case AArch64::STPQi:
2240 : case AArch64::STNPQi:
2241 917 : Scale = 16;
2242 917 : Width = 32;
2243 917 : MinOffset = -64;
2244 917 : MaxOffset = 63;
2245 917 : break;
2246 3874 : case AArch64::LDRQui:
2247 : case AArch64::STRQui:
2248 3874 : Scale = Width = 16;
2249 3874 : MinOffset = 0;
2250 3874 : MaxOffset = 4095;
2251 3874 : break;
2252 561 : case AArch64::LDPXi:
2253 : case AArch64::LDPDi:
2254 : case AArch64::LDNPXi:
2255 : case AArch64::LDNPDi:
2256 : case AArch64::STPXi:
2257 : case AArch64::STPDi:
2258 : case AArch64::STNPXi:
2259 : case AArch64::STNPDi:
2260 561 : Scale = 8;
2261 561 : Width = 16;
2262 561 : MinOffset = -64;
2263 561 : MaxOffset = 63;
2264 561 : break;
2265 6931 : case AArch64::LDRXui:
2266 : case AArch64::LDRDui:
2267 : case AArch64::STRXui:
2268 : case AArch64::STRDui:
2269 6931 : Scale = Width = 8;
2270 6931 : MinOffset = 0;
2271 6931 : MaxOffset = 4095;
2272 6931 : break;
2273 170 : case AArch64::LDPWi:
2274 : case AArch64::LDPSi:
2275 : case AArch64::LDNPWi:
2276 : case AArch64::LDNPSi:
2277 : case AArch64::STPWi:
2278 : case AArch64::STPSi:
2279 : case AArch64::STNPWi:
2280 : case AArch64::STNPSi:
2281 170 : Scale = 4;
2282 170 : Width = 8;
2283 170 : MinOffset = -64;
2284 170 : MaxOffset = 63;
2285 170 : break;
2286 2170 : case AArch64::LDRWui:
2287 : case AArch64::LDRSui:
2288 : case AArch64::LDRSWui:
2289 : case AArch64::STRWui:
2290 : case AArch64::STRSui:
2291 2170 : Scale = Width = 4;
2292 2170 : MinOffset = 0;
2293 2170 : MaxOffset = 4095;
2294 2170 : break;
2295 239 : case AArch64::LDRHui:
2296 : case AArch64::LDRHHui:
2297 : case AArch64::STRHui:
2298 : case AArch64::STRHHui:
2299 239 : Scale = Width = 2;
2300 239 : MinOffset = 0;
2301 239 : MaxOffset = 4095;
2302 239 : break;
2303 883 : case AArch64::LDRBui:
2304 : case AArch64::LDRBBui:
2305 : case AArch64::STRBui:
2306 : case AArch64::STRBBui:
2307 883 : Scale = Width = 1;
2308 883 : MinOffset = 0;
2309 883 : MaxOffset = 4095;
2310 883 : break;
2311 : }
2312 :
2313 : return true;
2314 : }
2315 :
2316 : // Scale the unscaled offsets. Returns false if the unscaled offset can't be
2317 : // scaled.
2318 55 : static bool scaleOffset(unsigned Opc, int64_t &Offset) {
2319 : unsigned OffsetStride = 1;
2320 55 : switch (Opc) {
2321 : default:
2322 : return false;
2323 : case AArch64::LDURQi:
2324 : case AArch64::STURQi:
2325 : OffsetStride = 16;
2326 : break;
2327 12 : case AArch64::LDURXi:
2328 : case AArch64::LDURDi:
2329 : case AArch64::STURXi:
2330 : case AArch64::STURDi:
2331 : OffsetStride = 8;
2332 12 : break;
2333 35 : case AArch64::LDURWi:
2334 : case AArch64::LDURSi:
2335 : case AArch64::LDURSWi:
2336 : case AArch64::STURWi:
2337 : case AArch64::STURSi:
2338 : OffsetStride = 4;
2339 35 : break;
2340 : }
2341 : // If the byte-offset isn't a multiple of the stride, we can't scale this
2342 : // offset.
2343 55 : if (Offset % OffsetStride != 0)
2344 : return false;
2345 :
2346 : // Convert the byte-offset used by unscaled into an "element" offset used
2347 : // by the scaled pair load/store instructions.
2348 48 : Offset /= OffsetStride;
2349 48 : return true;
2350 : }
2351 :
2352 : static bool canPairLdStOpc(unsigned FirstOpc, unsigned SecondOpc) {
2353 556 : if (FirstOpc == SecondOpc)
2354 : return true;
2355 : // We can also pair sign-ext and zero-ext instructions.
2356 74 : switch (FirstOpc) {
2357 : default:
2358 : return false;
2359 3 : case AArch64::LDRWui:
2360 : case AArch64::LDURWi:
2361 3 : return SecondOpc == AArch64::LDRSWui || SecondOpc == AArch64::LDURSWi;
2362 4 : case AArch64::LDRSWui:
2363 : case AArch64::LDURSWi:
2364 4 : return SecondOpc == AArch64::LDRWui || SecondOpc == AArch64::LDURWi;
2365 : }
2366 : // These instructions can't be paired based on their opcodes.
2367 : return false;
2368 : }
2369 :
2370 : /// Detect opportunities for ldp/stp formation.
2371 : ///
2372 : /// Only called for LdSt for which getMemOpBaseRegImmOfs returns true.
2373 1539 : bool AArch64InstrInfo::shouldClusterMemOps(MachineInstr &FirstLdSt,
2374 : unsigned BaseReg1,
2375 : MachineInstr &SecondLdSt,
2376 : unsigned BaseReg2,
2377 : unsigned NumLoads) const {
2378 1539 : if (BaseReg1 != BaseReg2)
2379 : return false;
2380 :
2381 : // Only cluster up to a single pair.
2382 783 : if (NumLoads > 1)
2383 : return false;
2384 :
2385 627 : if (!isPairableLdStInst(FirstLdSt) || !isPairableLdStInst(SecondLdSt))
2386 71 : return false;
2387 :
2388 : // Can we pair these instructions based on their opcodes?
2389 556 : unsigned FirstOpc = FirstLdSt.getOpcode();
2390 556 : unsigned SecondOpc = SecondLdSt.getOpcode();
2391 7 : if (!canPairLdStOpc(FirstOpc, SecondOpc))
2392 : return false;
2393 :
2394 : // Can't merge volatiles or load/stores that have a hint to avoid pair
2395 : // formation, for example.
2396 946 : if (!isCandidateToMergeOrPair(FirstLdSt) ||
2397 458 : !isCandidateToMergeOrPair(SecondLdSt))
2398 32 : return false;
2399 :
2400 : // isCandidateToMergeOrPair guarantees that operand 2 is an immediate.
2401 456 : int64_t Offset1 = FirstLdSt.getOperand(2).getImm();
2402 456 : if (isUnscaledLdSt(FirstOpc) && !scaleOffset(FirstOpc, Offset1))
2403 : return false;
2404 :
2405 449 : int64_t Offset2 = SecondLdSt.getOperand(2).getImm();
2406 449 : if (isUnscaledLdSt(SecondOpc) && !scaleOffset(SecondOpc, Offset2))
2407 : return false;
2408 :
2409 : // Pairwise instructions have a 7-bit signed offset field.
2410 449 : if (Offset1 > 63 || Offset1 < -64)
2411 : return false;
2412 :
2413 : // The caller should already have ordered First/SecondLdSt by offset.
2414 : assert(Offset1 <= Offset2 && "Caller should have ordered offsets.");
2415 435 : return Offset1 + 1 == Offset2;
2416 : }
2417 :
2418 87 : static const MachineInstrBuilder &AddSubReg(const MachineInstrBuilder &MIB,
2419 : unsigned Reg, unsigned SubIdx,
2420 : unsigned State,
2421 : const TargetRegisterInfo *TRI) {
2422 87 : if (!SubIdx)
2423 0 : return MIB.addReg(Reg, State);
2424 :
2425 87 : if (TargetRegisterInfo::isPhysicalRegister(Reg))
2426 87 : return MIB.addReg(TRI->getSubReg(Reg, SubIdx), State);
2427 0 : return MIB.addReg(Reg, State, SubIdx);
2428 : }
2429 :
2430 : static bool forwardCopyWillClobberTuple(unsigned DestReg, unsigned SrcReg,
2431 : unsigned NumRegs) {
2432 : // We really want the positive remainder mod 32 here, that happens to be
2433 : // easily obtainable with a mask.
2434 11 : return ((DestReg - SrcReg) & 0x1f) < NumRegs;
2435 : }
2436 :
2437 11 : void AArch64InstrInfo::copyPhysRegTuple(MachineBasicBlock &MBB,
2438 : MachineBasicBlock::iterator I,
2439 : const DebugLoc &DL, unsigned DestReg,
2440 : unsigned SrcReg, bool KillSrc,
2441 : unsigned Opcode,
2442 : ArrayRef<unsigned> Indices) const {
2443 : assert(Subtarget.hasNEON() && "Unexpected register copy without NEON");
2444 : const TargetRegisterInfo *TRI = &getRegisterInfo();
2445 11 : uint16_t DestEncoding = TRI->getEncodingValue(DestReg);
2446 : uint16_t SrcEncoding = TRI->getEncodingValue(SrcReg);
2447 11 : unsigned NumRegs = Indices.size();
2448 :
2449 11 : int SubReg = 0, End = NumRegs, Incr = 1;
2450 11 : if (forwardCopyWillClobberTuple(DestEncoding, SrcEncoding, NumRegs)) {
2451 4 : SubReg = NumRegs - 1;
2452 : End = -1;
2453 : Incr = -1;
2454 : }
2455 :
2456 40 : for (; SubReg != End; SubReg += Incr) {
2457 58 : const MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(Opcode));
2458 58 : AddSubReg(MIB, DestReg, Indices[SubReg], RegState::Define, TRI);
2459 29 : AddSubReg(MIB, SrcReg, Indices[SubReg], 0, TRI);
2460 29 : AddSubReg(MIB, SrcReg, Indices[SubReg], getKillRegState(KillSrc), TRI);
2461 : }
2462 11 : }
2463 :
2464 3599 : void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB,
2465 : MachineBasicBlock::iterator I,
2466 : const DebugLoc &DL, unsigned DestReg,
2467 : unsigned SrcReg, bool KillSrc) const {
2468 4808 : if (AArch64::GPR32spRegClass.contains(DestReg) &&
2469 700 : (AArch64::GPR32spRegClass.contains(SrcReg) || SrcReg == AArch64::WZR)) {
2470 : const TargetRegisterInfo *TRI = &getRegisterInfo();
2471 :
2472 943 : if (DestReg == AArch64::WSP || SrcReg == AArch64::WSP) {
2473 : // If either operand is WSP, expand to ADD #0.
2474 0 : if (Subtarget.hasZeroCycleRegMove()) {
2475 : // Cyclone recognizes "ADD Xd, Xn, #0" as a zero-cycle register move.
2476 0 : unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2477 : &AArch64::GPR64spRegClass);
2478 0 : unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2479 : &AArch64::GPR64spRegClass);
2480 : // This instruction is reading and writing X registers. This may upset
2481 : // the register scavenger and machine verifier, so we need to indicate
2482 : // that we are reading an undefined value from SrcRegX, but a proper
2483 : // value from SrcReg.
2484 0 : BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestRegX)
2485 0 : .addReg(SrcRegX, RegState::Undef)
2486 : .addImm(0)
2487 : .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
2488 0 : .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2489 : } else {
2490 0 : BuildMI(MBB, I, DL, get(AArch64::ADDWri), DestReg)
2491 0 : .addReg(SrcReg, getKillRegState(KillSrc))
2492 : .addImm(0)
2493 : .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2494 : }
2495 943 : } else if (SrcReg == AArch64::WZR && Subtarget.hasZeroCycleZeroingGP()) {
2496 144 : BuildMI(MBB, I, DL, get(AArch64::MOVZWi), DestReg)
2497 : .addImm(0)
2498 : .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2499 : } else {
2500 871 : if (Subtarget.hasZeroCycleRegMove()) {
2501 : // Cyclone recognizes "ORR Xd, XZR, Xm" as a zero-cycle register move.
2502 82 : unsigned DestRegX = TRI->getMatchingSuperReg(DestReg, AArch64::sub_32,
2503 : &AArch64::GPR64spRegClass);
2504 82 : unsigned SrcRegX = TRI->getMatchingSuperReg(SrcReg, AArch64::sub_32,
2505 : &AArch64::GPR64spRegClass);
2506 : // This instruction is reading and writing X registers. This may upset
2507 : // the register scavenger and machine verifier, so we need to indicate
2508 : // that we are reading an undefined value from SrcRegX, but a proper
2509 : // value from SrcReg.
2510 246 : BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestRegX)
2511 82 : .addReg(AArch64::XZR)
2512 82 : .addReg(SrcRegX, RegState::Undef)
2513 82 : .addReg(SrcReg, RegState::Implicit | getKillRegState(KillSrc));
2514 : } else {
2515 : // Otherwise, expand to ORR WZR.
2516 2367 : BuildMI(MBB, I, DL, get(AArch64::ORRWrr), DestReg)
2517 789 : .addReg(AArch64::WZR)
2518 789 : .addReg(SrcReg, getKillRegState(KillSrc));
2519 : }
2520 : }
2521 943 : return;
2522 : }
2523 :
2524 3689 : if (AArch64::GPR64spRegClass.contains(DestReg) &&
2525 260 : (AArch64::GPR64spRegClass.contains(SrcReg) || SrcReg == AArch64::XZR)) {
2526 908 : if (DestReg == AArch64::SP || SrcReg == AArch64::SP) {
2527 : // If either operand is SP, expand to ADD #0.
2528 345 : BuildMI(MBB, I, DL, get(AArch64::ADDXri), DestReg)
2529 115 : .addReg(SrcReg, getKillRegState(KillSrc))
2530 : .addImm(0)
2531 : .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2532 793 : } else if (SrcReg == AArch64::XZR && Subtarget.hasZeroCycleZeroingGP()) {
2533 60 : BuildMI(MBB, I, DL, get(AArch64::MOVZXi), DestReg)
2534 : .addImm(0)
2535 : .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
2536 : } else {
2537 : // Otherwise, expand to ORR XZR.
2538 2289 : BuildMI(MBB, I, DL, get(AArch64::ORRXrr), DestReg)
2539 763 : .addReg(AArch64::XZR)
2540 763 : .addReg(SrcReg, getKillRegState(KillSrc));
2541 : }
2542 908 : return;
2543 : }
2544 :
2545 : // Copy a DDDD register quad by copying the individual sub-registers.
2546 1748 : if (AArch64::DDDDRegClass.contains(DestReg) &&
2547 : AArch64::DDDDRegClass.contains(SrcReg)) {
2548 : static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2549 : AArch64::dsub2, AArch64::dsub3};
2550 0 : copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2551 : Indices);
2552 0 : return;
2553 : }
2554 :
2555 : // Copy a DDD register triple by copying the individual sub-registers.
2556 1749 : if (AArch64::DDDRegClass.contains(DestReg) &&
2557 : AArch64::DDDRegClass.contains(SrcReg)) {
2558 : static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1,
2559 : AArch64::dsub2};
2560 1 : copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2561 : Indices);
2562 1 : return;
2563 : }
2564 :
2565 : // Copy a DD register pair by copying the individual sub-registers.
2566 1751 : if (AArch64::DDRegClass.contains(DestReg) &&
2567 : AArch64::DDRegClass.contains(SrcReg)) {
2568 : static const unsigned Indices[] = {AArch64::dsub0, AArch64::dsub1};
2569 4 : copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv8i8,
2570 : Indices);
2571 4 : return;
2572 : }
2573 :
2574 : // Copy a QQQQ register quad by copying the individual sub-registers.
2575 1745 : if (AArch64::QQQQRegClass.contains(DestReg) &&
2576 : AArch64::QQQQRegClass.contains(SrcReg)) {
2577 : static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2578 : AArch64::qsub2, AArch64::qsub3};
2579 2 : copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2580 : Indices);
2581 2 : return;
2582 : }
2583 :
2584 : // Copy a QQQ register triple by copying the individual sub-registers.
2585 1743 : if (AArch64::QQQRegClass.contains(DestReg) &&
2586 : AArch64::QQQRegClass.contains(SrcReg)) {
2587 : static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1,
2588 : AArch64::qsub2};
2589 2 : copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2590 : Indices);
2591 2 : return;
2592 : }
2593 :
2594 : // Copy a QQ register pair by copying the individual sub-registers.
2595 1741 : if (AArch64::QQRegClass.contains(DestReg) &&
2596 : AArch64::QQRegClass.contains(SrcReg)) {
2597 : static const unsigned Indices[] = {AArch64::qsub0, AArch64::qsub1};
2598 2 : copyPhysRegTuple(MBB, I, DL, DestReg, SrcReg, KillSrc, AArch64::ORRv16i8,
2599 : Indices);
2600 2 : return;
2601 : }
2602 :
2603 2066 : if (AArch64::FPR128RegClass.contains(DestReg) &&
2604 : AArch64::FPR128RegClass.contains(SrcReg)) {
2605 329 : if (Subtarget.hasNEON()) {
2606 984 : BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2607 328 : .addReg(SrcReg)
2608 328 : .addReg(SrcReg, getKillRegState(KillSrc));
2609 : } else {
2610 2 : BuildMI(MBB, I, DL, get(AArch64::STRQpre))
2611 1 : .addReg(AArch64::SP, RegState::Define)
2612 1 : .addReg(SrcReg, getKillRegState(KillSrc))
2613 1 : .addReg(AArch64::SP)
2614 : .addImm(-16);
2615 3 : BuildMI(MBB, I, DL, get(AArch64::LDRQpre))
2616 1 : .addReg(AArch64::SP, RegState::Define)
2617 1 : .addReg(DestReg, RegState::Define)
2618 1 : .addReg(AArch64::SP)
2619 : .addImm(16);
2620 : }
2621 329 : return;
2622 : }
2623 :
2624 1764 : if (AArch64::FPR64RegClass.contains(DestReg) &&
2625 : AArch64::FPR64RegClass.contains(SrcReg)) {
2626 352 : if (Subtarget.hasNEON()) {
2627 : DestReg = RI.getMatchingSuperReg(DestReg, AArch64::dsub,
2628 : &AArch64::FPR128RegClass);
2629 349 : SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::dsub,
2630 : &AArch64::FPR128RegClass);
2631 1047 : BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2632 349 : .addReg(SrcReg)
2633 349 : .addReg(SrcReg, getKillRegState(KillSrc));
2634 : } else {
2635 9 : BuildMI(MBB, I, DL, get(AArch64::FMOVDr), DestReg)
2636 3 : .addReg(SrcReg, getKillRegState(KillSrc));
2637 : }
2638 352 : return;
2639 : }
2640 :
2641 1433 : if (AArch64::FPR32RegClass.contains(DestReg) &&
2642 : AArch64::FPR32RegClass.contains(SrcReg)) {
2643 79 : if (Subtarget.hasNEON()) {
2644 : DestReg = RI.getMatchingSuperReg(DestReg, AArch64::ssub,
2645 : &AArch64::FPR128RegClass);
2646 78 : SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::ssub,
2647 : &AArch64::FPR128RegClass);
2648 234 : BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2649 78 : .addReg(SrcReg)
2650 78 : .addReg(SrcReg, getKillRegState(KillSrc));
2651 : } else {
2652 3 : BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2653 1 : .addReg(SrcReg, getKillRegState(KillSrc));
2654 : }
2655 79 : return;
2656 : }
2657 :
2658 1017 : if (AArch64::FPR16RegClass.contains(DestReg) &&
2659 : AArch64::FPR16RegClass.contains(SrcReg)) {
2660 40 : if (Subtarget.hasNEON()) {
2661 : DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2662 : &AArch64::FPR128RegClass);
2663 40 : SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2664 : &AArch64::FPR128RegClass);
2665 120 : BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2666 40 : .addReg(SrcReg)
2667 40 : .addReg(SrcReg, getKillRegState(KillSrc));
2668 : } else {
2669 : DestReg = RI.getMatchingSuperReg(DestReg, AArch64::hsub,
2670 : &AArch64::FPR32RegClass);
2671 0 : SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::hsub,
2672 : &AArch64::FPR32RegClass);
2673 0 : BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2674 0 : .addReg(SrcReg, getKillRegState(KillSrc));
2675 : }
2676 40 : return;
2677 : }
2678 :
2679 937 : if (AArch64::FPR8RegClass.contains(DestReg) &&
2680 : AArch64::FPR8RegClass.contains(SrcReg)) {
2681 0 : if (Subtarget.hasNEON()) {
2682 : DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2683 : &AArch64::FPR128RegClass);
2684 0 : SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2685 : &AArch64::FPR128RegClass);
2686 0 : BuildMI(MBB, I, DL, get(AArch64::ORRv16i8), DestReg)
2687 0 : .addReg(SrcReg)
2688 0 : .addReg(SrcReg, getKillRegState(KillSrc));
2689 : } else {
2690 : DestReg = RI.getMatchingSuperReg(DestReg, AArch64::bsub,
2691 : &AArch64::FPR32RegClass);
2692 0 : SrcReg = RI.getMatchingSuperReg(SrcReg, AArch64::bsub,
2693 : &AArch64::FPR32RegClass);
2694 0 : BuildMI(MBB, I, DL, get(AArch64::FMOVSr), DestReg)
2695 0 : .addReg(SrcReg, getKillRegState(KillSrc));
2696 : }
2697 0 : return;
2698 : }
2699 :
2700 : // Copies between GPR64 and FPR64.
2701 265 : if (AArch64::FPR64RegClass.contains(DestReg) &&
2702 132 : AArch64::GPR64RegClass.contains(SrcReg)) {
2703 264 : BuildMI(MBB, I, DL, get(AArch64::FMOVXDr), DestReg)
2704 132 : .addReg(SrcReg, getKillRegState(KillSrc));
2705 132 : return;
2706 : }
2707 930 : if (AArch64::GPR64RegClass.contains(DestReg) &&
2708 : AArch64::FPR64RegClass.contains(SrcReg)) {
2709 248 : BuildMI(MBB, I, DL, get(AArch64::FMOVDXr), DestReg)
2710 124 : .addReg(SrcReg, getKillRegState(KillSrc));
2711 124 : return;
2712 : }
2713 : // Copies between GPR32 and FPR32.
2714 1041 : if (AArch64::FPR32RegClass.contains(DestReg) &&
2715 413 : AArch64::GPR32RegClass.contains(SrcReg)) {
2716 826 : BuildMI(MBB, I, DL, get(AArch64::FMOVWSr), DestReg)
2717 413 : .addReg(SrcReg, getKillRegState(KillSrc));
2718 413 : return;
2719 : }
2720 534 : if (AArch64::GPR32RegClass.contains(DestReg) &&
2721 : AArch64::FPR32RegClass.contains(SrcReg)) {
2722 532 : BuildMI(MBB, I, DL, get(AArch64::FMOVSWr), DestReg)
2723 266 : .addReg(SrcReg, getKillRegState(KillSrc));
2724 266 : return;
2725 : }
2726 :
2727 2 : if (DestReg == AArch64::NZCV) {
2728 : assert(AArch64::GPR64RegClass.contains(SrcReg) && "Invalid NZCV copy");
2729 2 : BuildMI(MBB, I, DL, get(AArch64::MSR))
2730 : .addImm(AArch64SysReg::NZCV)
2731 1 : .addReg(SrcReg, getKillRegState(KillSrc))
2732 1 : .addReg(AArch64::NZCV, RegState::Implicit | RegState::Define);
2733 1 : return;
2734 : }
2735 :
2736 1 : if (SrcReg == AArch64::NZCV) {
2737 : assert(AArch64::GPR64RegClass.contains(DestReg) && "Invalid NZCV copy");
2738 2 : BuildMI(MBB, I, DL, get(AArch64::MRS), DestReg)
2739 : .addImm(AArch64SysReg::NZCV)
2740 1 : .addReg(AArch64::NZCV, RegState::Implicit | getKillRegState(KillSrc));
2741 1 : return;
2742 : }
2743 :
2744 0 : llvm_unreachable("unimplemented reg-to-reg copy");
2745 : }
2746 :
2747 2 : static void storeRegPairToStackSlot(const TargetRegisterInfo &TRI,
2748 : MachineBasicBlock &MBB,
2749 : MachineBasicBlock::iterator InsertBefore,
2750 : const MCInstrDesc &MCID,
2751 : unsigned SrcReg, bool IsKill,
2752 : unsigned SubIdx0, unsigned SubIdx1, int FI,
2753 : MachineMemOperand *MMO) {
2754 : unsigned SrcReg0 = SrcReg;
2755 : unsigned SrcReg1 = SrcReg;
2756 2 : if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
2757 0 : SrcReg0 = TRI.getSubReg(SrcReg, SubIdx0);
2758 : SubIdx0 = 0;
2759 0 : SrcReg1 = TRI.getSubReg(SrcReg, SubIdx1);
2760 : SubIdx1 = 0;
2761 : }
2762 4 : BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2763 2 : .addReg(SrcReg0, getKillRegState(IsKill), SubIdx0)
2764 2 : .addReg(SrcReg1, getKillRegState(IsKill), SubIdx1)
2765 : .addFrameIndex(FI)
2766 : .addImm(0)
2767 : .addMemOperand(MMO);
2768 2 : }
2769 :
2770 1234 : void AArch64InstrInfo::storeRegToStackSlot(
2771 : MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned SrcReg,
2772 : bool isKill, int FI, const TargetRegisterClass *RC,
2773 : const TargetRegisterInfo *TRI) const {
2774 1234 : MachineFunction &MF = *MBB.getParent();
2775 1234 : MachineFrameInfo &MFI = MF.getFrameInfo();
2776 : unsigned Align = MFI.getObjectAlignment(FI);
2777 :
2778 1234 : MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2779 1234 : MachineMemOperand *MMO = MF.getMachineMemOperand(
2780 : PtrInfo, MachineMemOperand::MOStore, MFI.getObjectSize(FI), Align);
2781 : unsigned Opc = 0;
2782 : bool Offset = true;
2783 1234 : switch (TRI->getSpillSize(*RC)) {
2784 0 : case 1:
2785 0 : if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2786 : Opc = AArch64::STRBui;
2787 : break;
2788 0 : case 2:
2789 0 : if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2790 : Opc = AArch64::STRHui;
2791 : break;
2792 141 : case 4:
2793 282 : if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2794 : Opc = AArch64::STRWui;
2795 128 : if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2796 33 : MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR32RegClass);
2797 : else
2798 : assert(SrcReg != AArch64::WSP);
2799 26 : } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2800 : Opc = AArch64::STRSui;
2801 : break;
2802 799 : case 8:
2803 1598 : if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2804 : Opc = AArch64::STRXui;
2805 644 : if (TargetRegisterInfo::isVirtualRegister(SrcReg))
2806 309 : MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
2807 : else
2808 : assert(SrcReg != AArch64::SP);
2809 310 : } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2810 : Opc = AArch64::STRDui;
2811 2 : } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2812 2 : storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2813 : get(AArch64::STPWi), SrcReg, isKill,
2814 : AArch64::sube32, AArch64::subo32, FI, MMO);
2815 2 : return;
2816 : }
2817 : break;
2818 287 : case 16:
2819 574 : if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2820 : Opc = AArch64::STRQui;
2821 2 : else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2822 : assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2823 : Opc = AArch64::ST1Twov1d;
2824 : Offset = false;
2825 2 : } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2826 2 : storeRegPairToStackSlot(getRegisterInfo(), MBB, MBBI,
2827 : get(AArch64::STPXi), SrcReg, isKill,
2828 : AArch64::sube64, AArch64::subo64, FI, MMO);
2829 1 : return;
2830 : }
2831 : break;
2832 0 : case 24:
2833 0 : if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2834 : assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2835 : Opc = AArch64::ST1Threev1d;
2836 : Offset = false;
2837 : }
2838 : break;
2839 3 : case 32:
2840 6 : if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2841 : assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2842 : Opc = AArch64::ST1Fourv1d;
2843 : Offset = false;
2844 6 : } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2845 : assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2846 : Opc = AArch64::ST1Twov2d;
2847 : Offset = false;
2848 : }
2849 : break;
2850 2 : case 48:
2851 4 : if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2852 : assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2853 : Opc = AArch64::ST1Threev2d;
2854 : Offset = false;
2855 : }
2856 : break;
2857 2 : case 64:
2858 4 : if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2859 : assert(Subtarget.hasNEON() && "Unexpected register store without NEON");
2860 : Opc = AArch64::ST1Fourv2d;
2861 : Offset = false;
2862 : }
2863 : break;
2864 : }
2865 : assert(Opc && "Unknown register class");
2866 :
2867 2464 : const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2868 1232 : .addReg(SrcReg, getKillRegState(isKill))
2869 1232 : .addFrameIndex(FI);
2870 :
2871 1232 : if (Offset)
2872 : MI.addImm(0);
2873 : MI.addMemOperand(MMO);
2874 : }
2875 :
2876 2 : static void loadRegPairFromStackSlot(const TargetRegisterInfo &TRI,
2877 : MachineBasicBlock &MBB,
2878 : MachineBasicBlock::iterator InsertBefore,
2879 : const MCInstrDesc &MCID,
2880 : unsigned DestReg, unsigned SubIdx0,
2881 : unsigned SubIdx1, int FI,
2882 : MachineMemOperand *MMO) {
2883 : unsigned DestReg0 = DestReg;
2884 : unsigned DestReg1 = DestReg;
2885 : bool IsUndef = true;
2886 2 : if (TargetRegisterInfo::isPhysicalRegister(DestReg)) {
2887 0 : DestReg0 = TRI.getSubReg(DestReg, SubIdx0);
2888 : SubIdx0 = 0;
2889 0 : DestReg1 = TRI.getSubReg(DestReg, SubIdx1);
2890 : SubIdx1 = 0;
2891 : IsUndef = false;
2892 : }
2893 4 : BuildMI(MBB, InsertBefore, DebugLoc(), MCID)
2894 2 : .addReg(DestReg0, RegState::Define | getUndefRegState(IsUndef), SubIdx0)
2895 2 : .addReg(DestReg1, RegState::Define | getUndefRegState(IsUndef), SubIdx1)
2896 : .addFrameIndex(FI)
2897 : .addImm(0)
2898 : .addMemOperand(MMO);
2899 2 : }
2900 :
2901 1177 : void AArch64InstrInfo::loadRegFromStackSlot(
2902 : MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned DestReg,
2903 : int FI, const TargetRegisterClass *RC,
2904 : const TargetRegisterInfo *TRI) const {
2905 1177 : MachineFunction &MF = *MBB.getParent();
2906 1177 : MachineFrameInfo &MFI = MF.getFrameInfo();
2907 : unsigned Align = MFI.getObjectAlignment(FI);
2908 1177 : MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
2909 1177 : MachineMemOperand *MMO = MF.getMachineMemOperand(
2910 : PtrInfo, MachineMemOperand::MOLoad, MFI.getObjectSize(FI), Align);
2911 :
2912 : unsigned Opc = 0;
2913 : bool Offset = true;
2914 1177 : switch (TRI->getSpillSize(*RC)) {
2915 0 : case 1:
2916 0 : if (AArch64::FPR8RegClass.hasSubClassEq(RC))
2917 : Opc = AArch64::LDRBui;
2918 : break;
2919 0 : case 2:
2920 0 : if (AArch64::FPR16RegClass.hasSubClassEq(RC))
2921 : Opc = AArch64::LDRHui;
2922 : break;
2923 78 : case 4:
2924 156 : if (AArch64::GPR32allRegClass.hasSubClassEq(RC)) {
2925 : Opc = AArch64::LDRWui;
2926 69 : if (TargetRegisterInfo::isVirtualRegister(DestReg))
2927 34 : MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR32RegClass);
2928 : else
2929 : assert(DestReg != AArch64::WSP);
2930 18 : } else if (AArch64::FPR32RegClass.hasSubClassEq(RC))
2931 : Opc = AArch64::LDRSui;
2932 : break;
2933 682 : case 8:
2934 1364 : if (AArch64::GPR64allRegClass.hasSubClassEq(RC)) {
2935 : Opc = AArch64::LDRXui;
2936 578 : if (TargetRegisterInfo::isVirtualRegister(DestReg))
2937 299 : MF.getRegInfo().constrainRegClass(DestReg, &AArch64::GPR64RegClass);
2938 : else
2939 : assert(DestReg != AArch64::SP);
2940 208 : } else if (AArch64::FPR64RegClass.hasSubClassEq(RC)) {
2941 : Opc = AArch64::LDRDui;
2942 2 : } else if (AArch64::WSeqPairsClassRegClass.hasSubClassEq(RC)) {
2943 2 : loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
2944 : get(AArch64::LDPWi), DestReg, AArch64::sube32,
2945 : AArch64::subo32, FI, MMO);
2946 2 : return;
2947 : }
2948 : break;
2949 410 : case 16:
2950 820 : if (AArch64::FPR128RegClass.hasSubClassEq(RC))
2951 : Opc = AArch64::LDRQui;
2952 2 : else if (AArch64::DDRegClass.hasSubClassEq(RC)) {
2953 : assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2954 : Opc = AArch64::LD1Twov1d;
2955 : Offset = false;
2956 2 : } else if (AArch64::XSeqPairsClassRegClass.hasSubClassEq(RC)) {
2957 2 : loadRegPairFromStackSlot(getRegisterInfo(), MBB, MBBI,
2958 : get(AArch64::LDPXi), DestReg, AArch64::sube64,
2959 : AArch64::subo64, FI, MMO);
2960 1 : return;
2961 : }
2962 : break;
2963 0 : case 24:
2964 0 : if (AArch64::DDDRegClass.hasSubClassEq(RC)) {
2965 : assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2966 : Opc = AArch64::LD1Threev1d;
2967 : Offset = false;
2968 : }
2969 : break;
2970 3 : case 32:
2971 6 : if (AArch64::DDDDRegClass.hasSubClassEq(RC)) {
2972 : assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2973 : Opc = AArch64::LD1Fourv1d;
2974 : Offset = false;
2975 6 : } else if (AArch64::QQRegClass.hasSubClassEq(RC)) {
2976 : assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2977 : Opc = AArch64::LD1Twov2d;
2978 : Offset = false;
2979 : }
2980 : break;
2981 2 : case 48:
2982 4 : if (AArch64::QQQRegClass.hasSubClassEq(RC)) {
2983 : assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2984 : Opc = AArch64::LD1Threev2d;
2985 : Offset = false;
2986 : }
2987 : break;
2988 2 : case 64:
2989 4 : if (AArch64::QQQQRegClass.hasSubClassEq(RC)) {
2990 : assert(Subtarget.hasNEON() && "Unexpected register load without NEON");
2991 : Opc = AArch64::LD1Fourv2d;
2992 : Offset = false;
2993 : }
2994 : break;
2995 : }
2996 : assert(Opc && "Unknown register class");
2997 :
2998 2350 : const MachineInstrBuilder MI = BuildMI(MBB, MBBI, DebugLoc(), get(Opc))
2999 1175 : .addReg(DestReg, getDefRegState(true))
3000 1175 : .addFrameIndex(FI);
3001 1175 : if (Offset)
3002 : MI.addImm(0);
3003 : MI.addMemOperand(MMO);
3004 : }
3005 :
3006 16311 : void llvm::emitFrameOffset(MachineBasicBlock &MBB,
3007 : MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
3008 : unsigned DestReg, unsigned SrcReg, int Offset,
3009 : const TargetInstrInfo *TII,
3010 : MachineInstr::MIFlag Flag, bool SetNZCV) {
3011 16311 : if (DestReg == SrcReg && Offset == 0)
3012 : return;
3013 :
3014 : assert((DestReg != AArch64::SP || Offset % 16 == 0) &&
3015 : "SP increment/decrement not 16-byte aligned");
3016 :
3017 : bool isSub = Offset < 0;
3018 2156 : if (isSub)
3019 853 : Offset = -Offset;
3020 :
3021 : // FIXME: If the offset won't fit in 24-bits, compute the offset into a
3022 : // scratch register. If DestReg is a virtual register, use it as the
3023 : // scratch register; otherwise, create a new virtual register (to be
3024 : // replaced by the scavenger at the end of PEI). That case can be optimized
3025 : // slightly if DestReg is SP which is always 16-byte aligned, so the scratch
3026 : // register can be loaded with offset%8 and the add/sub can use an extending
3027 : // instruction with LSL#3.
3028 : // Currently the function handles any offsets but generates a poor sequence
3029 : // of code.
3030 : // assert(Offset < (1 << 24) && "unimplemented reg plus immediate");
3031 :
3032 : unsigned Opc;
3033 2156 : if (SetNZCV)
3034 3 : Opc = isSub ? AArch64::SUBSXri : AArch64::ADDSXri;
3035 : else
3036 2153 : Opc = isSub ? AArch64::SUBXri : AArch64::ADDXri;
3037 : const unsigned MaxEncoding = 0xfff;
3038 : const unsigned ShiftSize = 12;
3039 : const unsigned MaxEncodableValue = MaxEncoding << ShiftSize;
3040 2189 : while (((unsigned)Offset) >= (1 << ShiftSize)) {
3041 : unsigned ThisVal;
3042 41 : if (((unsigned)Offset) > MaxEncodableValue) {
3043 : ThisVal = MaxEncodableValue;
3044 : } else {
3045 31 : ThisVal = Offset & MaxEncodableValue;
3046 : }
3047 : assert((ThisVal >> ShiftSize) <= MaxEncoding &&
3048 : "Encoding cannot handle value that big");
3049 82 : BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3050 41 : .addReg(SrcReg)
3051 41 : .addImm(ThisVal >> ShiftSize)
3052 : .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, ShiftSize))
3053 : .setMIFlag(Flag);
3054 :
3055 : SrcReg = DestReg;
3056 41 : Offset -= ThisVal;
3057 41 : if (Offset == 0)
3058 : return;
3059 : }
3060 6444 : BuildMI(MBB, MBBI, DL, TII->get(Opc), DestReg)
3061 2148 : .addReg(SrcReg)
3062 2148 : .addImm(Offset)
3063 : .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0))
3064 : .setMIFlag(Flag);
3065 : }
3066 :
3067 1265 : MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
3068 : MachineFunction &MF, MachineInstr &MI, ArrayRef<unsigned> Ops,
3069 : MachineBasicBlock::iterator InsertPt, int FrameIndex,
3070 : LiveIntervals *LIS) const {
3071 : // This is a bit of a hack. Consider this instruction:
3072 : //
3073 : // %0 = COPY %sp; GPR64all:%0
3074 : //
3075 : // We explicitly chose GPR64all for the virtual register so such a copy might
3076 : // be eliminated by RegisterCoalescer. However, that may not be possible, and
3077 : // %0 may even spill. We can't spill %sp, and since it is in the GPR64all
3078 : // register class, TargetInstrInfo::foldMemoryOperand() is going to try.
3079 : //
3080 : // To prevent that, we are going to constrain the %0 register class here.
3081 : //
3082 : // <rdar://problem/11522048>
3083 : //
3084 : if (MI.isFullCopy()) {
3085 420 : unsigned DstReg = MI.getOperand(0).getReg();
3086 420 : unsigned SrcReg = MI.getOperand(1).getReg();
3087 420 : if (SrcReg == AArch64::SP &&
3088 : TargetRegisterInfo::isVirtualRegister(DstReg)) {
3089 1 : MF.getRegInfo().constrainRegClass(DstReg, &AArch64::GPR64RegClass);
3090 1 : return nullptr;
3091 : }
3092 419 : if (DstReg == AArch64::SP &&
3093 : TargetRegisterInfo::isVirtualRegister(SrcReg)) {
3094 1 : MF.getRegInfo().constrainRegClass(SrcReg, &AArch64::GPR64RegClass);
3095 1 : return nullptr;
3096 : }
3097 : }
3098 :
3099 : // Handle the case where a copy is being spilled or filled but the source
3100 : // and destination register class don't match. For example:
3101 : //
3102 : // %0 = COPY %xzr; GPR64common:%0
3103 : //
3104 : // In this case we can still safely fold away the COPY and generate the
3105 : // following spill code:
3106 : //
3107 : // STRXui %xzr, %stack.0
3108 : //
3109 : // This also eliminates spilled cross register class COPYs (e.g. between x and
3110 : // d regs) of the same size. For example:
3111 : //
3112 : // %0 = COPY %1; GPR64:%0, FPR64:%1
3113 : //
3114 : // will be filled as
3115 : //
3116 : // LDRDui %0, fi<#0>
3117 : //
3118 : // instead of
3119 : //
3120 : // LDRXui %Temp, fi<#0>
3121 : // %0 = FMOV %Temp
3122 : //
3123 1263 : if (MI.isCopy() && Ops.size() == 1 &&
3124 : // Make sure we're only folding the explicit COPY defs/uses.
3125 590 : (Ops[0] == 0 || Ops[0] == 1)) {
3126 : bool IsSpill = Ops[0] == 0;
3127 : bool IsFill = !IsSpill;
3128 590 : const TargetRegisterInfo &TRI = *MF.getSubtarget().getRegisterInfo();
3129 590 : const MachineRegisterInfo &MRI = MF.getRegInfo();
3130 590 : MachineBasicBlock &MBB = *MI.getParent();
3131 590 : const MachineOperand &DstMO = MI.getOperand(0);
3132 : const MachineOperand &SrcMO = MI.getOperand(1);
3133 590 : unsigned DstReg = DstMO.getReg();
3134 590 : unsigned SrcReg = SrcMO.getReg();
3135 : // This is slightly expensive to compute for physical regs since
3136 : // getMinimalPhysRegClass is slow.
3137 : auto getRegClass = [&](unsigned Reg) {
3138 : return TargetRegisterInfo::isVirtualRegister(Reg)
3139 : ? MRI.getRegClass(Reg)
3140 : : TRI.getMinimalPhysRegClass(Reg);
3141 590 : };
3142 :
3143 590 : if (DstMO.getSubReg() == 0 && SrcMO.getSubReg() == 0) {
3144 : assert(TRI.getRegSizeInBits(*getRegClass(DstReg)) ==
3145 : TRI.getRegSizeInBits(*getRegClass(SrcReg)) &&
3146 : "Mismatched register size in non subreg COPY");
3147 418 : if (IsSpill)
3148 183 : storeRegToStackSlot(MBB, InsertPt, SrcReg, SrcMO.isKill(), FrameIndex,
3149 : getRegClass(SrcReg), &TRI);
3150 : else
3151 235 : loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex,
3152 : getRegClass(DstReg), &TRI);
3153 501 : return &*--InsertPt;
3154 : }
3155 :
3156 : // Handle cases like spilling def of:
3157 : //
3158 : // %0:sub_32<def,read-undef> = COPY %wzr; GPR64common:%0
3159 : //
3160 : // where the physical register source can be widened and stored to the full
3161 : // virtual reg destination stack slot, in this case producing:
3162 : //
3163 : // STRXui %xzr, %stack.0
3164 : //
3165 172 : if (IsSpill && DstMO.isUndef() &&
3166 : TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
3167 : assert(SrcMO.getSubReg() == 0 &&
3168 : "Unexpected subreg on physical register");
3169 : const TargetRegisterClass *SpillRC;
3170 : unsigned SpillSubreg;
3171 81 : switch (DstMO.getSubReg()) {
3172 : default:
3173 : SpillRC = nullptr;
3174 : break;
3175 57 : case AArch64::sub_32:
3176 : case AArch64::ssub:
3177 57 : if (AArch64::GPR32RegClass.contains(SrcReg)) {
3178 : SpillRC = &AArch64::GPR64RegClass;
3179 : SpillSubreg = AArch64::sub_32;
3180 54 : } else if (AArch64::FPR32RegClass.contains(SrcReg)) {
3181 : SpillRC = &AArch64::FPR64RegClass;
3182 : SpillSubreg = AArch64::ssub;
3183 : } else
3184 : SpillRC = nullptr;
3185 : break;
3186 24 : case AArch64::dsub:
3187 24 : if (AArch64::FPR64RegClass.contains(SrcReg)) {
3188 : SpillRC = &AArch64::FPR128RegClass;
3189 : SpillSubreg = AArch64::dsub;
3190 : } else
3191 : SpillRC = nullptr;
3192 : break;
3193 : }
3194 :
3195 : if (SpillRC)
3196 81 : if (unsigned WidenedSrcReg =
3197 81 : TRI.getMatchingSuperReg(SrcReg, SpillSubreg, SpillRC)) {
3198 81 : storeRegToStackSlot(MBB, InsertPt, WidenedSrcReg, SrcMO.isKill(),
3199 : FrameIndex, SpillRC, &TRI);
3200 81 : return &*--InsertPt;
3201 : }
3202 : }
3203 :
3204 : // Handle cases like filling use of:
3205 : //
3206 : // %0:sub_32<def,read-undef> = COPY %1; GPR64:%0, GPR32:%1
3207 : //
3208 : // where we can load the full virtual reg source stack slot, into the subreg
3209 : // destination, in this case producing:
3210 : //
3211 : // LDRWui %0:sub_32<def,read-undef>, %stack.0
3212 : //
3213 91 : if (IsFill && SrcMO.getSubReg() == 0 && DstMO.isUndef()) {
3214 : const TargetRegisterClass *FillRC;
3215 2 : switch (DstMO.getSubReg()) {
3216 : default:
3217 : FillRC = nullptr;
3218 : break;
3219 : case AArch64::sub_32:
3220 : FillRC = &AArch64::GPR32RegClass;
3221 : break;
3222 1 : case AArch64::ssub:
3223 : FillRC = &AArch64::FPR32RegClass;
3224 1 : break;
3225 0 : case AArch64::dsub:
3226 : FillRC = &AArch64::FPR64RegClass;
3227 0 : break;
3228 : }
3229 :
3230 : if (FillRC) {
3231 : assert(TRI.getRegSizeInBits(*getRegClass(SrcReg)) ==
3232 : TRI.getRegSizeInBits(*FillRC) &&
3233 : "Mismatched regclass size on folded subreg COPY");
3234 2 : loadRegFromStackSlot(MBB, InsertPt, DstReg, FrameIndex, FillRC, &TRI);
3235 : MachineInstr &LoadMI = *--InsertPt;
3236 2 : MachineOperand &LoadDst = LoadMI.getOperand(0);
3237 : assert(LoadDst.getSubReg() == 0 && "unexpected subreg on fill load");
3238 : LoadDst.setSubReg(DstMO.getSubReg());
3239 : LoadDst.setIsUndef();
3240 2 : return &LoadMI;
3241 : }
3242 : }
3243 : }
3244 :
3245 : // Cannot fold.
3246 : return nullptr;
3247 : }
3248 :
3249 8598 : int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI, int &Offset,
3250 : bool *OutUseUnscaledOp,
3251 : unsigned *OutUnscaledOp,
3252 : int *EmittableOffset) {
3253 : int Scale = 1;
3254 : bool IsSigned = false;
3255 : // The ImmIdx should be changed case by case if it is not 2.
3256 : unsigned ImmIdx = 2;
3257 : unsigned UnscaledOp = 0;
3258 : // Set output values in case of early exit.
3259 8598 : if (EmittableOffset)
3260 3691 : *EmittableOffset = 0;
3261 8598 : if (OutUseUnscaledOp)
3262 3691 : *OutUseUnscaledOp = false;
3263 8598 : if (OutUnscaledOp)
3264 3691 : *OutUnscaledOp = 0;
3265 17196 : switch (MI.getOpcode()) {
3266 0 : default:
3267 0 : llvm_unreachable("unhandled opcode in rewriteAArch64FrameIndex");
3268 : // Vector spills/fills can't take an immediate offset.
3269 : case AArch64::LD1Twov2d:
3270 : case AArch64::LD1Threev2d:
3271 : case AArch64::LD1Fourv2d:
3272 : case AArch64::LD1Twov1d:
3273 : case AArch64::LD1Threev1d:
3274 : case AArch64::LD1Fourv1d:
3275 : case AArch64::ST1Twov2d:
3276 : case AArch64::ST1Threev2d:
3277 : case AArch64::ST1Fourv2d:
3278 : case AArch64::ST1Twov1d:
3279 : case AArch64::ST1Threev1d:
3280 : case AArch64::ST1Fourv1d:
3281 : return AArch64FrameOffsetCannotUpdate;
3282 : case AArch64::PRFMui:
3283 : Scale = 8;
3284 : UnscaledOp = AArch64::PRFUMi;
3285 : break;
3286 1634 : case AArch64::LDRXui:
3287 : Scale = 8;
3288 : UnscaledOp = AArch64::LDURXi;
3289 1634 : break;
3290 436 : case AArch64::LDRWui:
3291 : Scale = 4;
3292 : UnscaledOp = AArch64::LDURWi;
3293 436 : break;
3294 0 : case AArch64::LDRBui:
3295 : Scale = 1;
3296 : UnscaledOp = AArch64::LDURBi;
3297 0 : break;
3298 4 : case AArch64::LDRHui:
3299 : Scale = 2;
3300 : UnscaledOp = AArch64::LDURHi;
3301 4 : break;
3302 87 : case AArch64::LDRSui:
3303 : Scale = 4;
3304 : UnscaledOp = AArch64::LDURSi;
3305 87 : break;
3306 366 : case AArch64::LDRDui:
3307 : Scale = 8;
3308 : UnscaledOp = AArch64::LDURDi;
3309 366 : break;
3310 1052 : case AArch64::LDRQui:
3311 : Scale = 16;
3312 : UnscaledOp = AArch64::LDURQi;
3313 1052 : break;
3314 47 : case AArch64::LDRBBui:
3315 : Scale = 1;
3316 : UnscaledOp = AArch64::LDURBBi;
3317 47 : break;
3318 33 : case AArch64::LDRHHui:
3319 : Scale = 2;
3320 : UnscaledOp = AArch64::LDURHHi;
3321 33 : break;
3322 0 : case AArch64::LDRSBXui:
3323 : Scale = 1;
3324 : UnscaledOp = AArch64::LDURSBXi;
3325 0 : break;
3326 15 : case AArch64::LDRSBWui:
3327 : Scale = 1;
3328 : UnscaledOp = AArch64::LDURSBWi;
3329 15 : break;
3330 0 : case AArch64::LDRSHXui:
3331 : Scale = 2;
3332 : UnscaledOp = AArch64::LDURSHXi;
3333 0 : break;
3334 12 : case AArch64::LDRSHWui:
3335 : Scale = 2;
3336 : UnscaledOp = AArch64::LDURSHWi;
3337 12 : break;
3338 6 : case AArch64::LDRSWui:
3339 : Scale = 4;
3340 : UnscaledOp = AArch64::LDURSWi;
3341 6 : break;
3342 :
3343 1995 : case AArch64::STRXui:
3344 : Scale = 8;
3345 : UnscaledOp = AArch64::STURXi;
3346 1995 : break;
3347 1006 : case AArch64::STRWui:
3348 : Scale = 4;
3349 : UnscaledOp = AArch64::STURWi;
3350 1006 : break;
3351 0 : case AArch64::STRBui:
3352 : Scale = 1;
3353 : UnscaledOp = AArch64::STURBi;
3354 0 : break;
3355 0 : case AArch64::STRHui:
3356 : Scale = 2;
3357 : UnscaledOp = AArch64::STURHi;
3358 0 : break;
3359 48 : case AArch64::STRSui:
3360 : Scale = 4;
3361 : UnscaledOp = AArch64::STURSi;
3362 48 : break;
3363 446 : case AArch64::STRDui:
3364 : Scale = 8;
3365 : UnscaledOp = AArch64::STURDi;
3366 446 : break;
3367 1271 : case AArch64::STRQui:
3368 : Scale = 16;
3369 : UnscaledOp = AArch64::STURQi;
3370 1271 : break;
3371 47 : case AArch64::STRBBui:
3372 : Scale = 1;
3373 : UnscaledOp = AArch64::STURBBi;
3374 47 : break;
3375 30 : case AArch64::STRHHui:
3376 : Scale = 2;
3377 : UnscaledOp = AArch64::STURHHi;
3378 30 : break;
3379 :
3380 6 : case AArch64::LDPXi:
3381 : case AArch64::LDPDi:
3382 : case AArch64::STPXi:
3383 : case AArch64::STPDi:
3384 : case AArch64::LDNPXi:
3385 : case AArch64::LDNPDi:
3386 : case AArch64::STNPXi:
3387 : case AArch64::STNPDi:
3388 : ImmIdx = 3;
3389 : IsSigned = true;
3390 : Scale = 8;
3391 6 : break;
3392 0 : case AArch64::LDPQi:
3393 : case AArch64::STPQi:
3394 : case AArch64::LDNPQi:
3395 : case AArch64::STNPQi:
3396 : ImmIdx = 3;
3397 : IsSigned = true;
3398 : Scale = 16;
3399 0 : break;
3400 0 : case AArch64::LDPWi:
3401 : case AArch64::LDPSi:
3402 : case AArch64::STPWi:
3403 : case AArch64::STPSi:
3404 : case AArch64::LDNPWi:
3405 : case AArch64::LDNPSi:
3406 : case AArch64::STNPWi:
3407 : case AArch64::STNPSi:
3408 : ImmIdx = 3;
3409 : IsSigned = true;
3410 : Scale = 4;
3411 0 : break;
3412 :
3413 10 : case AArch64::LDURXi:
3414 : case AArch64::LDURWi:
3415 : case AArch64::LDURBi:
3416 : case AArch64::LDURHi:
3417 : case AArch64::LDURSi:
3418 : case AArch64::LDURDi:
3419 : case AArch64::LDURQi:
3420 : case AArch64::LDURHHi:
3421 : case AArch64::LDURBBi:
3422 : case AArch64::LDURSBXi:
3423 : case AArch64::LDURSBWi:
3424 : case AArch64::LDURSHXi:
3425 : case AArch64::LDURSHWi:
3426 : case AArch64::LDURSWi:
3427 : case AArch64::STURXi:
3428 : case AArch64::STURWi:
3429 : case AArch64::STURBi:
3430 : case AArch64::STURHi:
3431 : case AArch64::STURSi:
3432 : case AArch64::STURDi:
3433 : case AArch64::STURQi:
3434 : case AArch64::STURBBi:
3435 : case AArch64::STURHHi:
3436 : Scale = 1;
3437 10 : break;
3438 : }
3439 :
3440 8575 : Offset += MI.getOperand(ImmIdx).getImm() * Scale;
3441 :
3442 : bool useUnscaledOp = false;
3443 : // If the offset doesn't match the scale, we rewrite the instruction to
3444 : // use the unscaled instruction instead. Likewise, if we have a negative
3445 : // offset (and have an unscaled op to use).
3446 8575 : if ((Offset & (Scale - 1)) != 0 || (Offset < 0 && UnscaledOp != 0))
3447 : useUnscaledOp = true;
3448 :
3449 : // Use an unscaled addressing mode if the instruction has a negative offset
3450 : // (or if the instruction is already using an unscaled addressing mode).
3451 : unsigned MaskBits;
3452 8575 : if (IsSigned) {
3453 : // ldp/stp instructions.
3454 : MaskBits = 7;
3455 6 : Offset /= Scale;
3456 8569 : } else if (UnscaledOp == 0 || useUnscaledOp) {
3457 : MaskBits = 9;
3458 : IsSigned = true;
3459 : Scale = 1;
3460 : } else {
3461 : MaskBits = 12;
3462 : IsSigned = false;
3463 8196 : Offset /= Scale;
3464 : }
3465 :
3466 : // Attempt to fold address computation.
3467 8575 : int MaxOff = (1 << (MaskBits - IsSigned)) - 1;
3468 8575 : int MinOff = (IsSigned ? (-MaxOff - 1) : 0);
3469 8575 : if (Offset >= MinOff && Offset <= MaxOff) {
3470 8398 : if (EmittableOffset)
3471 3675 : *EmittableOffset = Offset;
3472 8398 : Offset = 0;
3473 : } else {
3474 177 : int NewOff = Offset < 0 ? MinOff : MaxOff;
3475 177 : if (EmittableOffset)
3476 1 : *EmittableOffset = NewOff;
3477 177 : Offset = (Offset - NewOff) * Scale;
3478 : }
3479 8575 : if (OutUseUnscaledOp)
3480 3676 : *OutUseUnscaledOp = useUnscaledOp;
3481 8575 : if (OutUnscaledOp)
3482 3676 : *OutUnscaledOp = UnscaledOp;
3483 8575 : return AArch64FrameOffsetCanUpdate |
3484 8575 : (Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
3485 : }
3486 :
3487 3905 : bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
3488 : unsigned FrameReg, int &Offset,
3489 : const AArch64InstrInfo *TII) {
3490 3905 : unsigned Opcode = MI.getOpcode();
3491 3905 : unsigned ImmIdx = FrameRegIdx + 1;
3492 :
3493 3905 : if (Opcode == AArch64::ADDSXri || Opcode == AArch64::ADDXri) {
3494 214 : Offset += MI.getOperand(ImmIdx).getImm();
3495 428 : emitFrameOffset(*MI.getParent(), MI, MI.getDebugLoc(),
3496 : MI.getOperand(0).getReg(), FrameReg, Offset, TII,
3497 : MachineInstr::NoFlags, (Opcode == AArch64::ADDSXri));
3498 214 : MI.eraseFromParent();
3499 214 : Offset = 0;
3500 214 : return true;
3501 : }
3502 :
3503 : int NewOffset;
3504 : unsigned UnscaledOp;
3505 : bool UseUnscaledOp;
3506 3691 : int Status = isAArch64FrameOffsetLegal(MI, Offset, &UseUnscaledOp,
3507 : &UnscaledOp, &NewOffset);
3508 3691 : if (Status & AArch64FrameOffsetCanUpdate) {
3509 3676 : if (Status & AArch64FrameOffsetIsLegal)
3510 : // Replace the FrameIndex with FrameReg.
3511 7350 : MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false);
3512 3676 : if (UseUnscaledOp)
3513 173 : MI.setDesc(TII->get(UnscaledOp));
3514 :
3515 7352 : MI.getOperand(ImmIdx).ChangeToImmediate(NewOffset);
3516 3676 : return Offset == 0;
3517 : }
3518 :
3519 : return false;
3520 : }
3521 :
3522 0 : void AArch64InstrInfo::getNoop(MCInst &NopInst) const {
3523 : NopInst.setOpcode(AArch64::HINT);
3524 0 : NopInst.addOperand(MCOperand::createImm(0));
3525 0 : }
3526 :
3527 : // AArch64 supports MachineCombiner.
3528 14114 : bool AArch64InstrInfo::useMachineCombiner() const { return true; }
3529 :
3530 : // True when Opc sets flag
3531 : static bool isCombineInstrSettingFlag(unsigned Opc) {
3532 2862 : switch (Opc) {
3533 : case AArch64::ADDSWrr:
3534 : case AArch64::ADDSWri:
3535 : case AArch64::ADDSXrr:
3536 : case AArch64::ADDSXri:
3537 : case AArch64::SUBSWrr:
3538 : case AArch64::SUBSXrr:
3539 : // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3540 : case AArch64::SUBSWri:
3541 : case AArch64::SUBSXri:
3542 : return true;
3543 : default:
3544 : break;
3545 : }
3546 : return false;
3547 : }
3548 :
3549 : // 32b Opcodes that can be combined with a MUL
3550 : static bool isCombineInstrCandidate32(unsigned Opc) {
3551 119915 : switch (Opc) {
3552 : case AArch64::ADDWrr:
3553 : case AArch64::ADDWri:
3554 : case AArch64::SUBWrr:
3555 : case AArch64::ADDSWrr:
3556 : case AArch64::ADDSWri:
3557 : case AArch64::SUBSWrr:
3558 : // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3559 : case AArch64::SUBWri:
3560 : case AArch64::SUBSWri:
3561 : return true;
3562 : default:
3563 : break;
3564 : }
3565 : return false;
3566 : }
3567 :
3568 : // 64b Opcodes that can be combined with a MUL
3569 : static bool isCombineInstrCandidate64(unsigned Opc) {
3570 118450 : switch (Opc) {
3571 : case AArch64::ADDXrr:
3572 : case AArch64::ADDXri:
3573 : case AArch64::SUBXrr:
3574 : case AArch64::ADDSXrr:
3575 : case AArch64::ADDSXri:
3576 : case AArch64::SUBSXrr:
3577 : // Note: MSUB Wd,Wn,Wm,Wi -> Wd = Wi - WnxWm, not Wd=WnxWm - Wi.
3578 : case AArch64::SUBXri:
3579 : case AArch64::SUBSXri:
3580 : return true;
3581 : default:
3582 : break;
3583 : }
3584 : return false;
3585 : }
3586 :
3587 : // FP Opcodes that can be combined with a FMUL
3588 119714 : static bool isCombineInstrCandidateFP(const MachineInstr &Inst) {
3589 239428 : switch (Inst.getOpcode()) {
3590 : default:
3591 119111 : break;
3592 603 : case AArch64::FADDSrr:
3593 : case AArch64::FADDDrr:
3594 : case AArch64::FADDv2f32:
3595 : case AArch64::FADDv2f64:
3596 : case AArch64::FADDv4f32:
3597 : case AArch64::FSUBSrr:
3598 : case AArch64::FSUBDrr:
3599 : case AArch64::FSUBv2f32:
3600 : case AArch64::FSUBv2f64:
3601 : case AArch64::FSUBv4f32:
3602 603 : TargetOptions Options = Inst.getParent()->getParent()->getTarget().Options;
3603 1055 : return (Options.UnsafeFPMath ||
3604 603 : Options.AllowFPOpFusion == FPOpFusion::Fast);
3605 : }
3606 119111 : return false;
3607 : }
3608 :
3609 : // Opcodes that can be combined with a MUL
3610 119915 : static bool isCombineInstrCandidate(unsigned Opc) {
3611 119915 : return (isCombineInstrCandidate32(Opc) || isCombineInstrCandidate64(Opc));
3612 : }
3613 :
3614 : //
3615 : // Utility routine that checks if \param MO is defined by an
3616 : // \param CombineOpc instruction in the basic block \param MBB
3617 3801 : static bool canCombine(MachineBasicBlock &MBB, MachineOperand &MO,
3618 : unsigned CombineOpc, unsigned ZeroReg = 0,
3619 : bool CheckZeroReg = false) {
3620 3801 : MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
3621 : MachineInstr *MI = nullptr;
3622 :
3623 3801 : if (MO.isReg() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
3624 3641 : MI = MRI.getUniqueVRegDef(MO.getReg());
3625 : // And it needs to be in the trace (otherwise, it won't have a depth).
3626 3641 : if (!MI || MI->getParent() != &MBB || (unsigned)MI->getOpcode() != CombineOpc)
3627 3508 : return false;
3628 : // Must only used by the user we combine with.
3629 293 : if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
3630 : return false;
3631 :
3632 278 : if (CheckZeroReg) {
3633 : assert(MI->getNumOperands() >= 4 && MI->getOperand(0).isReg() &&
3634 : MI->getOperand(1).isReg() && MI->getOperand(2).isReg() &&
3635 : MI->getOperand(3).isReg() && "MAdd/MSub must have a least 4 regs");
3636 : // The third input reg must be zero.
3637 210 : if (MI->getOperand(3).getReg() != ZeroReg)
3638 8 : return false;
3639 : }
3640 :
3641 : return true;
3642 : }
3643 :
3644 : //
3645 : // Is \param MO defined by an integer multiply and can be combined?
3646 : static bool canCombineWithMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3647 : unsigned MulOpc, unsigned ZeroReg) {
3648 3082 : return canCombine(MBB, MO, MulOpc, ZeroReg, true);
3649 : }
3650 :
3651 : //
3652 : // Is \param MO defined by a floating-point multiply and can be combined?
3653 : static bool canCombineWithFMUL(MachineBasicBlock &MBB, MachineOperand &MO,
3654 : unsigned MulOpc) {
3655 719 : return canCombine(MBB, MO, MulOpc);
3656 : }
3657 :
3658 : // TODO: There are many more machine instruction opcodes to match:
3659 : // 1. Other data types (integer, vectors)
3660 : // 2. Other math / logic operations (xor, or)
3661 : // 3. Other forms of the same operation (intrinsics and other variants)
3662 119659 : bool AArch64InstrInfo::isAssociativeAndCommutative(
3663 : const MachineInstr &Inst) const {
3664 239318 : switch (Inst.getOpcode()) {
3665 908 : case AArch64::FADDDrr:
3666 : case AArch64::FADDSrr:
3667 : case AArch64::FADDv2f32:
3668 : case AArch64::FADDv2f64:
3669 : case AArch64::FADDv4f32:
3670 : case AArch64::FMULDrr:
3671 : case AArch64::FMULSrr:
3672 : case AArch64::FMULX32:
3673 : case AArch64::FMULX64:
3674 : case AArch64::FMULXv2f32:
3675 : case AArch64::FMULXv2f64:
3676 : case AArch64::FMULXv4f32:
3677 : case AArch64::FMULv2f32:
3678 : case AArch64::FMULv2f64:
3679 : case AArch64::FMULv4f32:
3680 908 : return Inst.getParent()->getParent()->getTarget().Options.UnsafeFPMath;
3681 : default:
3682 : return false;
3683 : }
3684 : }
3685 :
3686 : /// Find instructions that can be turned into madd.
3687 119915 : static bool getMaddPatterns(MachineInstr &Root,
3688 : SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3689 119915 : unsigned Opc = Root.getOpcode();
3690 119915 : MachineBasicBlock &MBB = *Root.getParent();
3691 : bool Found = false;
3692 :
3693 119915 : if (!isCombineInstrCandidate(Opc))
3694 : return false;
3695 : if (isCombineInstrSettingFlag(Opc)) {
3696 1444 : int Cmp_NZCV = Root.findRegisterDefOperandIdx(AArch64::NZCV, true);
3697 : // When NZCV is live bail out.
3698 1444 : if (Cmp_NZCV == -1)
3699 : return false;
3700 645 : unsigned NewOpc = convertToNonFlagSettingOpc(Root);
3701 : // When opcode can't change bail out.
3702 : // CHECKME: do we miss any cases for opcode conversion?
3703 645 : if (NewOpc == Opc)
3704 : return false;
3705 : Opc = NewOpc;
3706 : }
3707 :
3708 2063 : switch (Opc) {
3709 : default:
3710 : break;
3711 298 : case AArch64::ADDWrr:
3712 : assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3713 : "ADDWrr does not have register operands");
3714 298 : if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3715 : AArch64::WZR)) {
3716 1 : Patterns.push_back(MachineCombinerPattern::MULADDW_OP1);
3717 : Found = true;
3718 : }
3719 298 : if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3720 : AArch64::WZR)) {
3721 2 : Patterns.push_back(MachineCombinerPattern::MULADDW_OP2);
3722 : Found = true;
3723 : }
3724 : break;
3725 255 : case AArch64::ADDXrr:
3726 255 : if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3727 : AArch64::XZR)) {
3728 8 : Patterns.push_back(MachineCombinerPattern::MULADDX_OP1);
3729 : Found = true;
3730 : }
3731 255 : if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3732 : AArch64::XZR)) {
3733 40 : Patterns.push_back(MachineCombinerPattern::MULADDX_OP2);
3734 : Found = true;
3735 : }
3736 : break;
3737 312 : case AArch64::SUBWrr:
3738 312 : if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3739 : AArch64::WZR)) {
3740 0 : Patterns.push_back(MachineCombinerPattern::MULSUBW_OP1);
3741 : Found = true;
3742 : }
3743 312 : if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDWrrr,
3744 : AArch64::WZR)) {
3745 130 : Patterns.push_back(MachineCombinerPattern::MULSUBW_OP2);
3746 : Found = true;
3747 : }
3748 : break;
3749 154 : case AArch64::SUBXrr:
3750 154 : if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3751 : AArch64::XZR)) {
3752 0 : Patterns.push_back(MachineCombinerPattern::MULSUBX_OP1);
3753 : Found = true;
3754 : }
3755 154 : if (canCombineWithMUL(MBB, Root.getOperand(2), AArch64::MADDXrrr,
3756 : AArch64::XZR)) {
3757 17 : Patterns.push_back(MachineCombinerPattern::MULSUBX_OP2);
3758 : Found = true;
3759 : }
3760 : break;
3761 231 : case AArch64::ADDWri:
3762 231 : if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3763 : AArch64::WZR)) {
3764 2 : Patterns.push_back(MachineCombinerPattern::MULADDWI_OP1);
3765 : Found = true;
3766 : }
3767 : break;
3768 600 : case AArch64::ADDXri:
3769 600 : if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3770 : AArch64::XZR)) {
3771 1 : Patterns.push_back(MachineCombinerPattern::MULADDXI_OP1);
3772 : Found = true;
3773 : }
3774 : break;
3775 79 : case AArch64::SUBWri:
3776 79 : if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDWrrr,
3777 : AArch64::WZR)) {
3778 0 : Patterns.push_back(MachineCombinerPattern::MULSUBWI_OP1);
3779 : Found = true;
3780 : }
3781 : break;
3782 134 : case AArch64::SUBXri:
3783 134 : if (canCombineWithMUL(MBB, Root.getOperand(1), AArch64::MADDXrrr,
3784 : AArch64::XZR)) {
3785 1 : Patterns.push_back(MachineCombinerPattern::MULSUBXI_OP1);
3786 : Found = true;
3787 : }
3788 : break;
3789 : }
3790 : return Found;
3791 : }
3792 : /// Floating-Point Support
3793 :
3794 : /// Find instructions that can be turned into madd.
3795 119714 : static bool getFMAPatterns(MachineInstr &Root,
3796 : SmallVectorImpl<MachineCombinerPattern> &Patterns) {
3797 :
3798 119714 : if (!isCombineInstrCandidateFP(Root))
3799 : return false;
3800 :
3801 183 : MachineBasicBlock &MBB = *Root.getParent();
3802 : bool Found = false;
3803 :
3804 366 : switch (Root.getOpcode()) {
3805 : default:
3806 : assert(false && "Unsupported FP instruction in combiner\n");
3807 : break;
3808 55 : case AArch64::FADDSrr:
3809 : assert(Root.getOperand(1).isReg() && Root.getOperand(2).isReg() &&
3810 : "FADDWrr does not have register operands");
3811 55 : if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3812 1 : Patterns.push_back(MachineCombinerPattern::FMULADDS_OP1);
3813 : Found = true;
3814 54 : } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3815 : AArch64::FMULv1i32_indexed)) {
3816 1 : Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP1);
3817 : Found = true;
3818 : }
3819 55 : if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3820 0 : Patterns.push_back(MachineCombinerPattern::FMULADDS_OP2);
3821 : Found = true;
3822 55 : } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3823 : AArch64::FMULv1i32_indexed)) {
3824 0 : Patterns.push_back(MachineCombinerPattern::FMLAv1i32_indexed_OP2);
3825 : Found = true;
3826 : }
3827 : break;
3828 30 : case AArch64::FADDDrr:
3829 30 : if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3830 1 : Patterns.push_back(MachineCombinerPattern::FMULADDD_OP1);
3831 : Found = true;
3832 29 : } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3833 : AArch64::FMULv1i64_indexed)) {
3834 1 : Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP1);
3835 : Found = true;
3836 : }
3837 30 : if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3838 2 : Patterns.push_back(MachineCombinerPattern::FMULADDD_OP2);
3839 : Found = true;
3840 28 : } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3841 : AArch64::FMULv1i64_indexed)) {
3842 0 : Patterns.push_back(MachineCombinerPattern::FMLAv1i64_indexed_OP2);
3843 : Found = true;
3844 : }
3845 : break;
3846 5 : case AArch64::FADDv2f32:
3847 5 : if (canCombineWithFMUL(MBB, Root.getOperand(1),
3848 : AArch64::FMULv2i32_indexed)) {
3849 1 : Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP1);
3850 : Found = true;
3851 4 : } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3852 : AArch64::FMULv2f32)) {
3853 1 : Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP1);
3854 : Found = true;
3855 : }
3856 5 : if (canCombineWithFMUL(MBB, Root.getOperand(2),
3857 : AArch64::FMULv2i32_indexed)) {
3858 0 : Patterns.push_back(MachineCombinerPattern::FMLAv2i32_indexed_OP2);
3859 : Found = true;
3860 5 : } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3861 : AArch64::FMULv2f32)) {
3862 0 : Patterns.push_back(MachineCombinerPattern::FMLAv2f32_OP2);
3863 : Found = true;
3864 : }
3865 : break;
3866 10 : case AArch64::FADDv2f64:
3867 10 : if (canCombineWithFMUL(MBB, Root.getOperand(1),
3868 : AArch64::FMULv2i64_indexed)) {
3869 1 : Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP1);
3870 : Found = true;
3871 9 : } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3872 : AArch64::FMULv2f64)) {
3873 1 : Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP1);
3874 : Found = true;
3875 : }
3876 10 : if (canCombineWithFMUL(MBB, Root.getOperand(2),
3877 : AArch64::FMULv2i64_indexed)) {
3878 0 : Patterns.push_back(MachineCombinerPattern::FMLAv2i64_indexed_OP2);
3879 : Found = true;
3880 10 : } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3881 : AArch64::FMULv2f64)) {
3882 0 : Patterns.push_back(MachineCombinerPattern::FMLAv2f64_OP2);
3883 : Found = true;
3884 : }
3885 : break;
3886 31 : case AArch64::FADDv4f32:
3887 31 : if (canCombineWithFMUL(MBB, Root.getOperand(1),
3888 : AArch64::FMULv4i32_indexed)) {
3889 1 : Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP1);
3890 : Found = true;
3891 30 : } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3892 : AArch64::FMULv4f32)) {
3893 1 : Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP1);
3894 : Found = true;
3895 : }
3896 31 : if (canCombineWithFMUL(MBB, Root.getOperand(2),
3897 : AArch64::FMULv4i32_indexed)) {
3898 0 : Patterns.push_back(MachineCombinerPattern::FMLAv4i32_indexed_OP2);
3899 : Found = true;
3900 31 : } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3901 : AArch64::FMULv4f32)) {
3902 0 : Patterns.push_back(MachineCombinerPattern::FMLAv4f32_OP2);
3903 : Found = true;
3904 : }
3905 : break;
3906 :
3907 6 : case AArch64::FSUBSrr:
3908 6 : if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULSrr)) {
3909 0 : Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP1);
3910 : Found = true;
3911 : }
3912 6 : if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULSrr)) {
3913 0 : Patterns.push_back(MachineCombinerPattern::FMULSUBS_OP2);
3914 : Found = true;
3915 6 : } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3916 : AArch64::FMULv1i32_indexed)) {
3917 2 : Patterns.push_back(MachineCombinerPattern::FMLSv1i32_indexed_OP2);
3918 : Found = true;
3919 : }
3920 6 : if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULSrr)) {
3921 2 : Patterns.push_back(MachineCombinerPattern::FNMULSUBS_OP1);
3922 : Found = true;
3923 : }
3924 : break;
3925 4 : case AArch64::FSUBDrr:
3926 4 : if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FMULDrr)) {
3927 0 : Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP1);
3928 : Found = true;
3929 : }
3930 4 : if (canCombineWithFMUL(MBB, Root.getOperand(2), AArch64::FMULDrr)) {
3931 0 : Patterns.push_back(MachineCombinerPattern::FMULSUBD_OP2);
3932 : Found = true;
3933 4 : } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3934 : AArch64::FMULv1i64_indexed)) {
3935 2 : Patterns.push_back(MachineCombinerPattern::FMLSv1i64_indexed_OP2);
3936 : Found = true;
3937 : }
3938 4 : if (canCombineWithFMUL(MBB, Root.getOperand(1), AArch64::FNMULDrr)) {
3939 2 : Patterns.push_back(MachineCombinerPattern::FNMULSUBD_OP1);
3940 : Found = true;
3941 : }
3942 : break;
3943 14 : case AArch64::FSUBv2f32:
3944 14 : if (canCombineWithFMUL(MBB, Root.getOperand(2),
3945 : AArch64::FMULv2i32_indexed)) {
3946 2 : Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP2);
3947 : Found = true;
3948 12 : } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3949 : AArch64::FMULv2f32)) {
3950 6 : Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP2);
3951 : Found = true;
3952 : }
3953 14 : if (canCombineWithFMUL(MBB, Root.getOperand(1),
3954 : AArch64::FMULv2i32_indexed)) {
3955 0 : Patterns.push_back(MachineCombinerPattern::FMLSv2i32_indexed_OP1);
3956 : Found = true;
3957 14 : } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3958 : AArch64::FMULv2f32)) {
3959 8 : Patterns.push_back(MachineCombinerPattern::FMLSv2f32_OP1);
3960 : Found = true;
3961 : }
3962 : break;
3963 16 : case AArch64::FSUBv2f64:
3964 16 : if (canCombineWithFMUL(MBB, Root.getOperand(2),
3965 : AArch64::FMULv2i64_indexed)) {
3966 2 : Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP2);
3967 : Found = true;
3968 14 : } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3969 : AArch64::FMULv2f64)) {
3970 6 : Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP2);
3971 : Found = true;
3972 : }
3973 16 : if (canCombineWithFMUL(MBB, Root.getOperand(1),
3974 : AArch64::FMULv2i64_indexed)) {
3975 0 : Patterns.push_back(MachineCombinerPattern::FMLSv2i64_indexed_OP1);
3976 : Found = true;
3977 16 : } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3978 : AArch64::FMULv2f64)) {
3979 8 : Patterns.push_back(MachineCombinerPattern::FMLSv2f64_OP1);
3980 : Found = true;
3981 : }
3982 : break;
3983 12 : case AArch64::FSUBv4f32:
3984 12 : if (canCombineWithFMUL(MBB, Root.getOperand(2),
3985 : AArch64::FMULv4i32_indexed)) {
3986 2 : Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP2);
3987 : Found = true;
3988 10 : } else if (canCombineWithFMUL(MBB, Root.getOperand(2),
3989 : AArch64::FMULv4f32)) {
3990 6 : Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP2);
3991 : Found = true;
3992 : }
3993 12 : if (canCombineWithFMUL(MBB, Root.getOperand(1),
3994 : AArch64::FMULv4i32_indexed)) {
3995 0 : Patterns.push_back(MachineCombinerPattern::FMLSv4i32_indexed_OP1);
3996 : Found = true;
3997 12 : } else if (canCombineWithFMUL(MBB, Root.getOperand(1),
3998 : AArch64::FMULv4f32)) {
3999 8 : Patterns.push_back(MachineCombinerPattern::FMLSv4f32_OP1);
4000 : Found = true;
4001 : }
4002 : break;
4003 : }
4004 : return Found;
4005 : }
4006 :
4007 : /// Return true when a code sequence can improve throughput. It
4008 : /// should be called only for instructions in loops.
4009 : /// \param Pattern - combiner pattern
4010 39 : bool AArch64InstrInfo::isThroughputPattern(
4011 : MachineCombinerPattern Pattern) const {
4012 : switch (Pattern) {
4013 : default:
4014 : break;
4015 : case MachineCombinerPattern::FMULADDS_OP1:
4016 : case MachineCombinerPattern::FMULADDS_OP2:
4017 : case MachineCombinerPattern::FMULSUBS_OP1:
4018 : case MachineCombinerPattern::FMULSUBS_OP2:
4019 : case MachineCombinerPattern::FMULADDD_OP1:
4020 : case MachineCombinerPattern::FMULADDD_OP2:
4021 : case MachineCombinerPattern::FMULSUBD_OP1:
4022 : case MachineCombinerPattern::FMULSUBD_OP2:
4023 : case MachineCombinerPattern::FNMULSUBS_OP1:
4024 : case MachineCombinerPattern::FNMULSUBD_OP1:
4025 : case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4026 : case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4027 : case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4028 : case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4029 : case MachineCombinerPattern::FMLAv2f32_OP2:
4030 : case MachineCombinerPattern::FMLAv2f32_OP1:
4031 : case MachineCombinerPattern::FMLAv2f64_OP1:
4032 : case MachineCombinerPattern::FMLAv2f64_OP2:
4033 : case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4034 : case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4035 : case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4036 : case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4037 : case MachineCombinerPattern::FMLAv4f32_OP1:
4038 : case MachineCombinerPattern::FMLAv4f32_OP2:
4039 : case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4040 : case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4041 : case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4042 : case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4043 : case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4044 : case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4045 : case MachineCombinerPattern::FMLSv2f32_OP2:
4046 : case MachineCombinerPattern::FMLSv2f64_OP2:
4047 : case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4048 : case MachineCombinerPattern::FMLSv4f32_OP2:
4049 : return true;
4050 : } // end switch (Pattern)
4051 : return false;
4052 : }
4053 : /// Return true when there is potentially a faster code sequence for an
4054 : /// instruction chain ending in \p Root. All potential patterns are listed in
4055 : /// the \p Pattern vector. Pattern should be sorted in priority order since the
4056 : /// pattern evaluator stops checking as soon as it finds a faster sequence.
4057 :
4058 119915 : bool AArch64InstrInfo::getMachineCombinerPatterns(
4059 : MachineInstr &Root,
4060 : SmallVectorImpl<MachineCombinerPattern> &Patterns) const {
4061 : // Integer patterns
4062 119915 : if (getMaddPatterns(Root, Patterns))
4063 : return true;
4064 : // Floating point patterns
4065 119714 : if (getFMAPatterns(Root, Patterns))
4066 : return true;
4067 :
4068 119659 : return TargetInstrInfo::getMachineCombinerPatterns(Root, Patterns);
4069 : }
4070 :
4071 : enum class FMAInstKind { Default, Indexed, Accumulator };
4072 : /// genFusedMultiply - Generate fused multiply instructions.
4073 : /// This function supports both integer and floating point instructions.
4074 : /// A typical example:
4075 : /// F|MUL I=A,B,0
4076 : /// F|ADD R,I,C
4077 : /// ==> F|MADD R,A,B,C
4078 : /// \param MF Containing MachineFunction
4079 : /// \param MRI Register information
4080 : /// \param TII Target information
4081 : /// \param Root is the F|ADD instruction
4082 : /// \param [out] InsInstrs is a vector of machine instructions and will
4083 : /// contain the generated madd instruction
4084 : /// \param IdxMulOpd is index of operand in Root that is the result of
4085 : /// the F|MUL. In the example above IdxMulOpd is 1.
4086 : /// \param MaddOpc the opcode fo the f|madd instruction
4087 : /// \param RC Register class of operands
4088 : /// \param kind of fma instruction (addressing mode) to be generated
4089 : /// \param ReplacedAddend is the result register from the instruction
4090 : /// replacing the non-combined operand, if any.
4091 : static MachineInstr *
4092 290 : genFusedMultiply(MachineFunction &MF, MachineRegisterInfo &MRI,
4093 : const TargetInstrInfo *TII, MachineInstr &Root,
4094 : SmallVectorImpl<MachineInstr *> &InsInstrs, unsigned IdxMulOpd,
4095 : unsigned MaddOpc, const TargetRegisterClass *RC,
4096 : FMAInstKind kind = FMAInstKind::Default,
4097 : const unsigned *ReplacedAddend = nullptr) {
4098 : assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4099 :
4100 290 : unsigned IdxOtherOpd = IdxMulOpd == 1 ? 2 : 1;
4101 580 : MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4102 290 : unsigned ResultReg = Root.getOperand(0).getReg();
4103 290 : unsigned SrcReg0 = MUL->getOperand(1).getReg();
4104 : bool Src0IsKill = MUL->getOperand(1).isKill();
4105 290 : unsigned SrcReg1 = MUL->getOperand(2).getReg();
4106 : bool Src1IsKill = MUL->getOperand(2).isKill();
4107 :
4108 : unsigned SrcReg2;
4109 : bool Src2IsKill;
4110 290 : if (ReplacedAddend) {
4111 : // If we just generated a new addend, we must be it's only use.
4112 36 : SrcReg2 = *ReplacedAddend;
4113 : Src2IsKill = true;
4114 : } else {
4115 254 : SrcReg2 = Root.getOperand(IdxOtherOpd).getReg();
4116 : Src2IsKill = Root.getOperand(IdxOtherOpd).isKill();
4117 : }
4118 :
4119 290 : if (TargetRegisterInfo::isVirtualRegister(ResultReg))
4120 290 : MRI.constrainRegClass(ResultReg, RC);
4121 290 : if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
4122 290 : MRI.constrainRegClass(SrcReg0, RC);
4123 290 : if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
4124 290 : MRI.constrainRegClass(SrcReg1, RC);
4125 290 : if (TargetRegisterInfo::isVirtualRegister(SrcReg2))
4126 290 : MRI.constrainRegClass(SrcReg2, RC);
4127 :
4128 : MachineInstrBuilder MIB;
4129 290 : if (kind == FMAInstKind::Default)
4130 618 : MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4131 206 : .addReg(SrcReg0, getKillRegState(Src0IsKill))
4132 206 : .addReg(SrcReg1, getKillRegState(Src1IsKill))
4133 206 : .addReg(SrcReg2, getKillRegState(Src2IsKill));
4134 84 : else if (kind == FMAInstKind::Indexed)
4135 45 : MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4136 15 : .addReg(SrcReg2, getKillRegState(Src2IsKill))
4137 15 : .addReg(SrcReg0, getKillRegState(Src0IsKill))
4138 15 : .addReg(SrcReg1, getKillRegState(Src1IsKill))
4139 15 : .addImm(MUL->getOperand(3).getImm());
4140 69 : else if (kind == FMAInstKind::Accumulator)
4141 207 : MIB = BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4142 69 : .addReg(SrcReg2, getKillRegState(Src2IsKill))
4143 69 : .addReg(SrcReg0, getKillRegState(Src0IsKill))
4144 69 : .addReg(SrcReg1, getKillRegState(Src1IsKill));
4145 : else
4146 : assert(false && "Invalid FMA instruction kind \n");
4147 : // Insert the MADD (MADD, FMA, FMS, FMLA, FMSL)
4148 290 : InsInstrs.push_back(MIB);
4149 290 : return MUL;
4150 : }
4151 :
4152 : /// genMaddR - Generate madd instruction and combine mul and add using
4153 : /// an extra virtual register
4154 : /// Example - an ADD intermediate needs to be stored in a register:
4155 : /// MUL I=A,B,0
4156 : /// ADD R,I,Imm
4157 : /// ==> ORR V, ZR, Imm
4158 : /// ==> MADD R,A,B,V
4159 : /// \param MF Containing MachineFunction
4160 : /// \param MRI Register information
4161 : /// \param TII Target information
4162 : /// \param Root is the ADD instruction
4163 : /// \param [out] InsInstrs is a vector of machine instructions and will
4164 : /// contain the generated madd instruction
4165 : /// \param IdxMulOpd is index of operand in Root that is the result of
4166 : /// the MUL. In the example above IdxMulOpd is 1.
4167 : /// \param MaddOpc the opcode fo the madd instruction
4168 : /// \param VR is a virtual register that holds the value of an ADD operand
4169 : /// (V in the example above).
4170 : /// \param RC Register class of operands
4171 3 : static MachineInstr *genMaddR(MachineFunction &MF, MachineRegisterInfo &MRI,
4172 : const TargetInstrInfo *TII, MachineInstr &Root,
4173 : SmallVectorImpl<MachineInstr *> &InsInstrs,
4174 : unsigned IdxMulOpd, unsigned MaddOpc, unsigned VR,
4175 : const TargetRegisterClass *RC) {
4176 : assert(IdxMulOpd == 1 || IdxMulOpd == 2);
4177 :
4178 6 : MachineInstr *MUL = MRI.getUniqueVRegDef(Root.getOperand(IdxMulOpd).getReg());
4179 3 : unsigned ResultReg = Root.getOperand(0).getReg();
4180 3 : unsigned SrcReg0 = MUL->getOperand(1).getReg();
4181 : bool Src0IsKill = MUL->getOperand(1).isKill();
4182 3 : unsigned SrcReg1 = MUL->getOperand(2).getReg();
4183 : bool Src1IsKill = MUL->getOperand(2).isKill();
4184 :
4185 3 : if (TargetRegisterInfo::isVirtualRegister(ResultReg))
4186 3 : MRI.constrainRegClass(ResultReg, RC);
4187 3 : if (TargetRegisterInfo::isVirtualRegister(SrcReg0))
4188 3 : MRI.constrainRegClass(SrcReg0, RC);
4189 3 : if (TargetRegisterInfo::isVirtualRegister(SrcReg1))
4190 3 : MRI.constrainRegClass(SrcReg1, RC);
4191 3 : if (TargetRegisterInfo::isVirtualRegister(VR))
4192 3 : MRI.constrainRegClass(VR, RC);
4193 :
4194 : MachineInstrBuilder MIB =
4195 6 : BuildMI(MF, Root.getDebugLoc(), TII->get(MaddOpc), ResultReg)
4196 3 : .addReg(SrcReg0, getKillRegState(Src0IsKill))
4197 3 : .addReg(SrcReg1, getKillRegState(Src1IsKill))
4198 3 : .addReg(VR);
4199 : // Insert the MADD
4200 3 : InsInstrs.push_back(MIB);
4201 3 : return MUL;
4202 : }
4203 :
4204 : /// When getMachineCombinerPatterns() finds potential patterns,
4205 : /// this function generates the instructions that could replace the
4206 : /// original code sequence
4207 476 : void AArch64InstrInfo::genAlternativeCodeSequence(
4208 : MachineInstr &Root, MachineCombinerPattern Pattern,
4209 : SmallVectorImpl<MachineInstr *> &InsInstrs,
4210 : SmallVectorImpl<MachineInstr *> &DelInstrs,
4211 : DenseMap<unsigned, unsigned> &InstrIdxForVirtReg) const {
4212 476 : MachineBasicBlock &MBB = *Root.getParent();
4213 476 : MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
4214 : MachineFunction &MF = *MBB.getParent();
4215 476 : const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
4216 :
4217 : MachineInstr *MUL;
4218 : const TargetRegisterClass *RC;
4219 : unsigned Opc;
4220 476 : switch (Pattern) {
4221 182 : default:
4222 : // Reassociate instructions.
4223 182 : TargetInstrInfo::genAlternativeCodeSequence(Root, Pattern, InsInstrs,
4224 : DelInstrs, InstrIdxForVirtReg);
4225 182 : return;
4226 9 : case MachineCombinerPattern::MULADDW_OP1:
4227 : case MachineCombinerPattern::MULADDX_OP1:
4228 : // MUL I=A,B,0
4229 : // ADD R,I,C
4230 : // ==> MADD R,A,B,C
4231 : // --- Create(MADD);
4232 9 : if (Pattern == MachineCombinerPattern::MULADDW_OP1) {
4233 : Opc = AArch64::MADDWrrr;
4234 : RC = &AArch64::GPR32RegClass;
4235 : } else {
4236 : Opc = AArch64::MADDXrrr;
4237 : RC = &AArch64::GPR64RegClass;
4238 : }
4239 9 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4240 9 : break;
4241 42 : case MachineCombinerPattern::MULADDW_OP2:
4242 : case MachineCombinerPattern::MULADDX_OP2:
4243 : // MUL I=A,B,0
4244 : // ADD R,C,I
4245 : // ==> MADD R,A,B,C
4246 : // --- Create(MADD);
4247 42 : if (Pattern == MachineCombinerPattern::MULADDW_OP2) {
4248 : Opc = AArch64::MADDWrrr;
4249 : RC = &AArch64::GPR32RegClass;
4250 : } else {
4251 : Opc = AArch64::MADDXrrr;
4252 : RC = &AArch64::GPR64RegClass;
4253 : }
4254 42 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4255 42 : break;
4256 3 : case MachineCombinerPattern::MULADDWI_OP1:
4257 : case MachineCombinerPattern::MULADDXI_OP1: {
4258 : // MUL I=A,B,0
4259 : // ADD R,I,Imm
4260 : // ==> ORR V, ZR, Imm
4261 : // ==> MADD R,A,B,V
4262 : // --- Create(MADD);
4263 : const TargetRegisterClass *OrrRC;
4264 : unsigned BitSize, OrrOpc, ZeroReg;
4265 3 : if (Pattern == MachineCombinerPattern::MULADDWI_OP1) {
4266 : OrrOpc = AArch64::ORRWri;
4267 : OrrRC = &AArch64::GPR32spRegClass;
4268 : BitSize = 32;
4269 : ZeroReg = AArch64::WZR;
4270 : Opc = AArch64::MADDWrrr;
4271 : RC = &AArch64::GPR32RegClass;
4272 : } else {
4273 : OrrOpc = AArch64::ORRXri;
4274 : OrrRC = &AArch64::GPR64spRegClass;
4275 : BitSize = 64;
4276 : ZeroReg = AArch64::XZR;
4277 : Opc = AArch64::MADDXrrr;
4278 : RC = &AArch64::GPR64RegClass;
4279 : }
4280 3 : unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4281 3 : uint64_t Imm = Root.getOperand(2).getImm();
4282 :
4283 3 : if (Root.getOperand(3).isImm()) {
4284 3 : unsigned Val = Root.getOperand(3).getImm();
4285 3 : Imm = Imm << Val;
4286 : }
4287 3 : uint64_t UImm = SignExtend64(Imm, BitSize);
4288 : uint64_t Encoding;
4289 3 : if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4290 : MachineInstrBuilder MIB1 =
4291 4 : BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4292 2 : .addReg(ZeroReg)
4293 2 : .addImm(Encoding);
4294 2 : InsInstrs.push_back(MIB1);
4295 2 : InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4296 2 : MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4297 : }
4298 : break;
4299 : }
4300 0 : case MachineCombinerPattern::MULSUBW_OP1:
4301 : case MachineCombinerPattern::MULSUBX_OP1: {
4302 : // MUL I=A,B,0
4303 : // SUB R,I, C
4304 : // ==> SUB V, 0, C
4305 : // ==> MADD R,A,B,V // = -C + A*B
4306 : // --- Create(MADD);
4307 : const TargetRegisterClass *SubRC;
4308 : unsigned SubOpc, ZeroReg;
4309 0 : if (Pattern == MachineCombinerPattern::MULSUBW_OP1) {
4310 : SubOpc = AArch64::SUBWrr;
4311 : SubRC = &AArch64::GPR32spRegClass;
4312 : ZeroReg = AArch64::WZR;
4313 : Opc = AArch64::MADDWrrr;
4314 : RC = &AArch64::GPR32RegClass;
4315 : } else {
4316 : SubOpc = AArch64::SUBXrr;
4317 : SubRC = &AArch64::GPR64spRegClass;
4318 : ZeroReg = AArch64::XZR;
4319 : Opc = AArch64::MADDXrrr;
4320 : RC = &AArch64::GPR64RegClass;
4321 : }
4322 0 : unsigned NewVR = MRI.createVirtualRegister(SubRC);
4323 : // SUB NewVR, 0, C
4324 : MachineInstrBuilder MIB1 =
4325 0 : BuildMI(MF, Root.getDebugLoc(), TII->get(SubOpc), NewVR)
4326 0 : .addReg(ZeroReg)
4327 0 : .add(Root.getOperand(2));
4328 0 : InsInstrs.push_back(MIB1);
4329 0 : InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4330 0 : MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4331 : break;
4332 : }
4333 147 : case MachineCombinerPattern::MULSUBW_OP2:
4334 : case MachineCombinerPattern::MULSUBX_OP2:
4335 : // MUL I=A,B,0
4336 : // SUB R,C,I
4337 : // ==> MSUB R,A,B,C (computes C - A*B)
4338 : // --- Create(MSUB);
4339 147 : if (Pattern == MachineCombinerPattern::MULSUBW_OP2) {
4340 : Opc = AArch64::MSUBWrrr;
4341 : RC = &AArch64::GPR32RegClass;
4342 : } else {
4343 : Opc = AArch64::MSUBXrrr;
4344 : RC = &AArch64::GPR64RegClass;
4345 : }
4346 147 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4347 147 : break;
4348 1 : case MachineCombinerPattern::MULSUBWI_OP1:
4349 : case MachineCombinerPattern::MULSUBXI_OP1: {
4350 : // MUL I=A,B,0
4351 : // SUB R,I, Imm
4352 : // ==> ORR V, ZR, -Imm
4353 : // ==> MADD R,A,B,V // = -Imm + A*B
4354 : // --- Create(MADD);
4355 : const TargetRegisterClass *OrrRC;
4356 : unsigned BitSize, OrrOpc, ZeroReg;
4357 1 : if (Pattern == MachineCombinerPattern::MULSUBWI_OP1) {
4358 : OrrOpc = AArch64::ORRWri;
4359 : OrrRC = &AArch64::GPR32spRegClass;
4360 : BitSize = 32;
4361 : ZeroReg = AArch64::WZR;
4362 : Opc = AArch64::MADDWrrr;
4363 : RC = &AArch64::GPR32RegClass;
4364 : } else {
4365 : OrrOpc = AArch64::ORRXri;
4366 : OrrRC = &AArch64::GPR64spRegClass;
4367 : BitSize = 64;
4368 : ZeroReg = AArch64::XZR;
4369 : Opc = AArch64::MADDXrrr;
4370 : RC = &AArch64::GPR64RegClass;
4371 : }
4372 1 : unsigned NewVR = MRI.createVirtualRegister(OrrRC);
4373 1 : uint64_t Imm = Root.getOperand(2).getImm();
4374 1 : if (Root.getOperand(3).isImm()) {
4375 1 : unsigned Val = Root.getOperand(3).getImm();
4376 1 : Imm = Imm << Val;
4377 : }
4378 1 : uint64_t UImm = SignExtend64(-Imm, BitSize);
4379 : uint64_t Encoding;
4380 1 : if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
4381 : MachineInstrBuilder MIB1 =
4382 2 : BuildMI(MF, Root.getDebugLoc(), TII->get(OrrOpc), NewVR)
4383 1 : .addReg(ZeroReg)
4384 1 : .addImm(Encoding);
4385 1 : InsInstrs.push_back(MIB1);
4386 1 : InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4387 1 : MUL = genMaddR(MF, MRI, TII, Root, InsInstrs, 1, Opc, NewVR, RC);
4388 : }
4389 : break;
4390 : }
4391 : // Floating Point Support
4392 2 : case MachineCombinerPattern::FMULADDS_OP1:
4393 : case MachineCombinerPattern::FMULADDD_OP1:
4394 : // MUL I=A,B,0
4395 : // ADD R,I,C
4396 : // ==> MADD R,A,B,C
4397 : // --- Create(MADD);
4398 2 : if (Pattern == MachineCombinerPattern::FMULADDS_OP1) {
4399 : Opc = AArch64::FMADDSrrr;
4400 : RC = &AArch64::FPR32RegClass;
4401 : } else {
4402 : Opc = AArch64::FMADDDrrr;
4403 : RC = &AArch64::FPR64RegClass;
4404 : }
4405 2 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4406 2 : break;
4407 2 : case MachineCombinerPattern::FMULADDS_OP2:
4408 : case MachineCombinerPattern::FMULADDD_OP2:
4409 : // FMUL I=A,B,0
4410 : // FADD R,C,I
4411 : // ==> FMADD R,A,B,C
4412 : // --- Create(FMADD);
4413 2 : if (Pattern == MachineCombinerPattern::FMULADDS_OP2) {
4414 : Opc = AArch64::FMADDSrrr;
4415 : RC = &AArch64::FPR32RegClass;
4416 : } else {
4417 : Opc = AArch64::FMADDDrrr;
4418 : RC = &AArch64::FPR64RegClass;
4419 : }
4420 2 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4421 2 : break;
4422 :
4423 1 : case MachineCombinerPattern::FMLAv1i32_indexed_OP1:
4424 : Opc = AArch64::FMLAv1i32_indexed;
4425 : RC = &AArch64::FPR32RegClass;
4426 1 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4427 : FMAInstKind::Indexed);
4428 1 : break;
4429 0 : case MachineCombinerPattern::FMLAv1i32_indexed_OP2:
4430 : Opc = AArch64::FMLAv1i32_indexed;
4431 : RC = &AArch64::FPR32RegClass;
4432 0 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4433 : FMAInstKind::Indexed);
4434 0 : break;
4435 :
4436 1 : case MachineCombinerPattern::FMLAv1i64_indexed_OP1:
4437 : Opc = AArch64::FMLAv1i64_indexed;
4438 : RC = &AArch64::FPR64RegClass;
4439 1 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4440 : FMAInstKind::Indexed);
4441 1 : break;
4442 0 : case MachineCombinerPattern::FMLAv1i64_indexed_OP2:
4443 : Opc = AArch64::FMLAv1i64_indexed;
4444 : RC = &AArch64::FPR64RegClass;
4445 0 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4446 : FMAInstKind::Indexed);
4447 0 : break;
4448 :
4449 2 : case MachineCombinerPattern::FMLAv2i32_indexed_OP1:
4450 : case MachineCombinerPattern::FMLAv2f32_OP1:
4451 : RC = &AArch64::FPR64RegClass;
4452 2 : if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP1) {
4453 : Opc = AArch64::FMLAv2i32_indexed;
4454 1 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4455 : FMAInstKind::Indexed);
4456 : } else {
4457 : Opc = AArch64::FMLAv2f32;
4458 1 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4459 : FMAInstKind::Accumulator);
4460 : }
4461 : break;
4462 0 : case MachineCombinerPattern::FMLAv2i32_indexed_OP2:
4463 : case MachineCombinerPattern::FMLAv2f32_OP2:
4464 : RC = &AArch64::FPR64RegClass;
4465 0 : if (Pattern == MachineCombinerPattern::FMLAv2i32_indexed_OP2) {
4466 : Opc = AArch64::FMLAv2i32_indexed;
4467 0 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4468 : FMAInstKind::Indexed);
4469 : } else {
4470 : Opc = AArch64::FMLAv2f32;
4471 0 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4472 : FMAInstKind::Accumulator);
4473 : }
4474 : break;
4475 :
4476 2 : case MachineCombinerPattern::FMLAv2i64_indexed_OP1:
4477 : case MachineCombinerPattern::FMLAv2f64_OP1:
4478 : RC = &AArch64::FPR128RegClass;
4479 2 : if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP1) {
4480 : Opc = AArch64::FMLAv2i64_indexed;
4481 1 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4482 : FMAInstKind::Indexed);
4483 : } else {
4484 : Opc = AArch64::FMLAv2f64;
4485 1 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4486 : FMAInstKind::Accumulator);
4487 : }
4488 : break;
4489 0 : case MachineCombinerPattern::FMLAv2i64_indexed_OP2:
4490 : case MachineCombinerPattern::FMLAv2f64_OP2:
4491 : RC = &AArch64::FPR128RegClass;
4492 0 : if (Pattern == MachineCombinerPattern::FMLAv2i64_indexed_OP2) {
4493 : Opc = AArch64::FMLAv2i64_indexed;
4494 0 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4495 : FMAInstKind::Indexed);
4496 : } else {
4497 : Opc = AArch64::FMLAv2f64;
4498 0 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4499 : FMAInstKind::Accumulator);
4500 : }
4501 : break;
4502 :
4503 2 : case MachineCombinerPattern::FMLAv4i32_indexed_OP1:
4504 : case MachineCombinerPattern::FMLAv4f32_OP1:
4505 : RC = &AArch64::FPR128RegClass;
4506 2 : if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP1) {
4507 : Opc = AArch64::FMLAv4i32_indexed;
4508 1 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4509 : FMAInstKind::Indexed);
4510 : } else {
4511 : Opc = AArch64::FMLAv4f32;
4512 1 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4513 : FMAInstKind::Accumulator);
4514 : }
4515 : break;
4516 :
4517 0 : case MachineCombinerPattern::FMLAv4i32_indexed_OP2:
4518 : case MachineCombinerPattern::FMLAv4f32_OP2:
4519 : RC = &AArch64::FPR128RegClass;
4520 0 : if (Pattern == MachineCombinerPattern::FMLAv4i32_indexed_OP2) {
4521 : Opc = AArch64::FMLAv4i32_indexed;
4522 0 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4523 : FMAInstKind::Indexed);
4524 : } else {
4525 : Opc = AArch64::FMLAv4f32;
4526 0 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4527 : FMAInstKind::Accumulator);
4528 : }
4529 : break;
4530 :
4531 0 : case MachineCombinerPattern::FMULSUBS_OP1:
4532 : case MachineCombinerPattern::FMULSUBD_OP1: {
4533 : // FMUL I=A,B,0
4534 : // FSUB R,I,C
4535 : // ==> FNMSUB R,A,B,C // = -C + A*B
4536 : // --- Create(FNMSUB);
4537 0 : if (Pattern == MachineCombinerPattern::FMULSUBS_OP1) {
4538 : Opc = AArch64::FNMSUBSrrr;
4539 : RC = &AArch64::FPR32RegClass;
4540 : } else {
4541 : Opc = AArch64::FNMSUBDrrr;
4542 : RC = &AArch64::FPR64RegClass;
4543 : }
4544 0 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4545 0 : break;
4546 : }
4547 :
4548 4 : case MachineCombinerPattern::FNMULSUBS_OP1:
4549 : case MachineCombinerPattern::FNMULSUBD_OP1: {
4550 : // FNMUL I=A,B,0
4551 : // FSUB R,I,C
4552 : // ==> FNMADD R,A,B,C // = -A*B - C
4553 : // --- Create(FNMADD);
4554 4 : if (Pattern == MachineCombinerPattern::FNMULSUBS_OP1) {
4555 : Opc = AArch64::FNMADDSrrr;
4556 : RC = &AArch64::FPR32RegClass;
4557 : } else {
4558 : Opc = AArch64::FNMADDDrrr;
4559 : RC = &AArch64::FPR64RegClass;
4560 : }
4561 4 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC);
4562 4 : break;
4563 : }
4564 :
4565 0 : case MachineCombinerPattern::FMULSUBS_OP2:
4566 : case MachineCombinerPattern::FMULSUBD_OP2: {
4567 : // FMUL I=A,B,0
4568 : // FSUB R,C,I
4569 : // ==> FMSUB R,A,B,C (computes C - A*B)
4570 : // --- Create(FMSUB);
4571 0 : if (Pattern == MachineCombinerPattern::FMULSUBS_OP2) {
4572 : Opc = AArch64::FMSUBSrrr;
4573 : RC = &AArch64::FPR32RegClass;
4574 : } else {
4575 : Opc = AArch64::FMSUBDrrr;
4576 : RC = &AArch64::FPR64RegClass;
4577 : }
4578 0 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC);
4579 0 : break;
4580 : }
4581 :
4582 2 : case MachineCombinerPattern::FMLSv1i32_indexed_OP2:
4583 : Opc = AArch64::FMLSv1i32_indexed;
4584 : RC = &AArch64::FPR32RegClass;
4585 2 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4586 : FMAInstKind::Indexed);
4587 2 : break;
4588 :
4589 2 : case MachineCombinerPattern::FMLSv1i64_indexed_OP2:
4590 : Opc = AArch64::FMLSv1i64_indexed;
4591 : RC = &AArch64::FPR64RegClass;
4592 2 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4593 : FMAInstKind::Indexed);
4594 2 : break;
4595 :
4596 12 : case MachineCombinerPattern::FMLSv2f32_OP2:
4597 : case MachineCombinerPattern::FMLSv2i32_indexed_OP2:
4598 : RC = &AArch64::FPR64RegClass;
4599 12 : if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP2) {
4600 : Opc = AArch64::FMLSv2i32_indexed;
4601 2 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4602 : FMAInstKind::Indexed);
4603 : } else {
4604 : Opc = AArch64::FMLSv2f32;
4605 10 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4606 : FMAInstKind::Accumulator);
4607 : }
4608 : break;
4609 :
4610 12 : case MachineCombinerPattern::FMLSv2f64_OP2:
4611 : case MachineCombinerPattern::FMLSv2i64_indexed_OP2:
4612 : RC = &AArch64::FPR128RegClass;
4613 12 : if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP2) {
4614 : Opc = AArch64::FMLSv2i64_indexed;
4615 2 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4616 : FMAInstKind::Indexed);
4617 : } else {
4618 : Opc = AArch64::FMLSv2f64;
4619 10 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4620 : FMAInstKind::Accumulator);
4621 : }
4622 : break;
4623 :
4624 12 : case MachineCombinerPattern::FMLSv4f32_OP2:
4625 : case MachineCombinerPattern::FMLSv4i32_indexed_OP2:
4626 : RC = &AArch64::FPR128RegClass;
4627 12 : if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP2) {
4628 : Opc = AArch64::FMLSv4i32_indexed;
4629 2 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4630 : FMAInstKind::Indexed);
4631 : } else {
4632 : Opc = AArch64::FMLSv4f32;
4633 10 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 2, Opc, RC,
4634 : FMAInstKind::Accumulator);
4635 : }
4636 : break;
4637 12 : case MachineCombinerPattern::FMLSv2f32_OP1:
4638 : case MachineCombinerPattern::FMLSv2i32_indexed_OP1: {
4639 : RC = &AArch64::FPR64RegClass;
4640 12 : unsigned NewVR = MRI.createVirtualRegister(RC);
4641 : MachineInstrBuilder MIB1 =
4642 24 : BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f32), NewVR)
4643 12 : .add(Root.getOperand(2));
4644 12 : InsInstrs.push_back(MIB1);
4645 12 : InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4646 12 : if (Pattern == MachineCombinerPattern::FMLSv2i32_indexed_OP1) {
4647 : Opc = AArch64::FMLAv2i32_indexed;
4648 0 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4649 : FMAInstKind::Indexed, &NewVR);
4650 : } else {
4651 : Opc = AArch64::FMLAv2f32;
4652 12 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4653 : FMAInstKind::Accumulator, &NewVR);
4654 : }
4655 : break;
4656 : }
4657 12 : case MachineCombinerPattern::FMLSv4f32_OP1:
4658 : case MachineCombinerPattern::FMLSv4i32_indexed_OP1: {
4659 : RC = &AArch64::FPR128RegClass;
4660 12 : unsigned NewVR = MRI.createVirtualRegister(RC);
4661 : MachineInstrBuilder MIB1 =
4662 24 : BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv4f32), NewVR)
4663 12 : .add(Root.getOperand(2));
4664 12 : InsInstrs.push_back(MIB1);
4665 12 : InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4666 12 : if (Pattern == MachineCombinerPattern::FMLSv4i32_indexed_OP1) {
4667 : Opc = AArch64::FMLAv4i32_indexed;
4668 0 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4669 : FMAInstKind::Indexed, &NewVR);
4670 : } else {
4671 : Opc = AArch64::FMLAv4f32;
4672 12 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4673 : FMAInstKind::Accumulator, &NewVR);
4674 : }
4675 : break;
4676 : }
4677 12 : case MachineCombinerPattern::FMLSv2f64_OP1:
4678 : case MachineCombinerPattern::FMLSv2i64_indexed_OP1: {
4679 : RC = &AArch64::FPR128RegClass;
4680 12 : unsigned NewVR = MRI.createVirtualRegister(RC);
4681 : MachineInstrBuilder MIB1 =
4682 24 : BuildMI(MF, Root.getDebugLoc(), TII->get(AArch64::FNEGv2f64), NewVR)
4683 12 : .add(Root.getOperand(2));
4684 12 : InsInstrs.push_back(MIB1);
4685 12 : InstrIdxForVirtReg.insert(std::make_pair(NewVR, 0));
4686 12 : if (Pattern == MachineCombinerPattern::FMLSv2i64_indexed_OP1) {
4687 : Opc = AArch64::FMLAv2i64_indexed;
4688 0 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4689 : FMAInstKind::Indexed, &NewVR);
4690 : } else {
4691 : Opc = AArch64::FMLAv2f64;
4692 12 : MUL = genFusedMultiply(MF, MRI, TII, Root, InsInstrs, 1, Opc, RC,
4693 : FMAInstKind::Accumulator, &NewVR);
4694 : }
4695 : break;
4696 : }
4697 : } // end switch (Pattern)
4698 : // Record MUL and ADD/SUB for deletion
4699 294 : DelInstrs.push_back(MUL);
4700 294 : DelInstrs.push_back(&Root);
4701 : }
4702 :
4703 : /// Replace csincr-branch sequence by simple conditional branch
4704 : ///
4705 : /// Examples:
4706 : /// 1. \code
4707 : /// csinc w9, wzr, wzr, <condition code>
4708 : /// tbnz w9, #0, 0x44
4709 : /// \endcode
4710 : /// to
4711 : /// \code
4712 : /// b.<inverted condition code>
4713 : /// \endcode
4714 : ///
4715 : /// 2. \code
4716 : /// csinc w9, wzr, wzr, <condition code>
4717 : /// tbz w9, #0, 0x44
4718 : /// \endcode
4719 : /// to
4720 : /// \code
4721 : /// b.<condition code>
4722 : /// \endcode
4723 : ///
4724 : /// Replace compare and branch sequence by TBZ/TBNZ instruction when the
4725 : /// compare's constant operand is power of 2.
4726 : ///
4727 : /// Examples:
4728 : /// \code
4729 : /// and w8, w8, #0x400
4730 : /// cbnz w8, L1
4731 : /// \endcode
4732 : /// to
4733 : /// \code
4734 : /// tbnz w8, #10, L1
4735 : /// \endcode
4736 : ///
4737 : /// \param MI Conditional Branch
4738 : /// \return True when the simple conditional branch is generated
4739 : ///
4740 973 : bool AArch64InstrInfo::optimizeCondBranch(MachineInstr &MI) const {
4741 : bool IsNegativeBranch = false;
4742 : bool IsTestAndBranch = false;
4743 : unsigned TargetBBInMI = 0;
4744 1946 : switch (MI.getOpcode()) {
4745 0 : default:
4746 0 : llvm_unreachable("Unknown branch instruction?");
4747 : case AArch64::Bcc:
4748 : return false;
4749 : case AArch64::CBZW:
4750 : case AArch64::CBZX:
4751 : TargetBBInMI = 1;
4752 : break;
4753 183 : case AArch64::CBNZW:
4754 : case AArch64::CBNZX:
4755 : TargetBBInMI = 1;
4756 : IsNegativeBranch = true;
4757 183 : break;
4758 80 : case AArch64::TBZW:
4759 : case AArch64::TBZX:
4760 : TargetBBInMI = 2;
4761 : IsTestAndBranch = true;
4762 80 : break;
4763 31 : case AArch64::TBNZW:
4764 : case AArch64::TBNZX:
4765 : TargetBBInMI = 2;
4766 : IsNegativeBranch = true;
4767 : IsTestAndBranch = true;
4768 31 : break;
4769 : }
4770 : // So we increment a zero register and test for bits other
4771 : // than bit 0? Conservatively bail out in case the verifier
4772 : // missed this case.
4773 389 : if (IsTestAndBranch && MI.getOperand(1).getImm())
4774 : return false;
4775 :
4776 : // Find Definition.
4777 : assert(MI.getParent() && "Incomplete machine instruciton\n");
4778 350 : MachineBasicBlock *MBB = MI.getParent();
4779 350 : MachineFunction *MF = MBB->getParent();
4780 350 : MachineRegisterInfo *MRI = &MF->getRegInfo();
4781 350 : unsigned VReg = MI.getOperand(0).getReg();
4782 350 : if (!TargetRegisterInfo::isVirtualRegister(VReg))
4783 : return false;
4784 :
4785 350 : MachineInstr *DefMI = MRI->getVRegDef(VReg);
4786 :
4787 : // Look through COPY instructions to find definition.
4788 396 : while (DefMI->isCopy()) {
4789 201 : unsigned CopyVReg = DefMI->getOperand(1).getReg();
4790 201 : if (!MRI->hasOneNonDBGUse(CopyVReg))
4791 : return false;
4792 145 : if (!MRI->hasOneDef(CopyVReg))
4793 : return false;
4794 46 : DefMI = MRI->getVRegDef(CopyVReg);
4795 : }
4796 :
4797 195 : switch (DefMI->getOpcode()) {
4798 : default:
4799 : return false;
4800 : // Fold AND into a TBZ/TBNZ if constant operand is power of 2.
4801 7 : case AArch64::ANDWri:
4802 : case AArch64::ANDXri: {
4803 7 : if (IsTestAndBranch)
4804 : return false;
4805 7 : if (DefMI->getParent() != MBB)
4806 : return false;
4807 6 : if (!MRI->hasOneNonDBGUse(VReg))
4808 : return false;
4809 :
4810 5 : bool Is32Bit = (DefMI->getOpcode() == AArch64::ANDWri);
4811 5 : uint64_t Mask = AArch64_AM::decodeLogicalImmediate(
4812 5 : DefMI->getOperand(2).getImm(), Is32Bit ? 32 : 64);
4813 : if (!isPowerOf2_64(Mask))
4814 : return false;
4815 :
4816 : MachineOperand &MO = DefMI->getOperand(1);
4817 5 : unsigned NewReg = MO.getReg();
4818 5 : if (!TargetRegisterInfo::isVirtualRegister(NewReg))
4819 : return false;
4820 :
4821 : assert(!MRI->def_empty(NewReg) && "Register must be defined.");
4822 :
4823 : MachineBasicBlock &RefToMBB = *MBB;
4824 5 : MachineBasicBlock *TBB = MI.getOperand(1).getMBB();
4825 : DebugLoc DL = MI.getDebugLoc();
4826 : unsigned Imm = Log2_64(Mask);
4827 : unsigned Opc = (Imm < 32)
4828 5 : ? (IsNegativeBranch ? AArch64::TBNZW : AArch64::TBZW)
4829 : : (IsNegativeBranch ? AArch64::TBNZX : AArch64::TBZX);
4830 10 : MachineInstr *NewMI = BuildMI(RefToMBB, MI, DL, get(Opc))
4831 5 : .addReg(NewReg)
4832 5 : .addImm(Imm)
4833 5 : .addMBB(TBB);
4834 : // Register lives on to the CBZ now.
4835 : MO.setIsKill(false);
4836 :
4837 : // For immediate smaller than 32, we need to use the 32-bit
4838 : // variant (W) in all cases. Indeed the 64-bit variant does not
4839 : // allow to encode them.
4840 : // Therefore, if the input register is 64-bit, we need to take the
4841 : // 32-bit sub-part.
4842 5 : if (!Is32Bit && Imm < 32)
4843 3 : NewMI->getOperand(0).setSubReg(AArch64::sub_32);
4844 5 : MI.eraseFromParent();
4845 : return true;
4846 : }
4847 : // Look for CSINC
4848 1 : case AArch64::CSINCWr:
4849 : case AArch64::CSINCXr: {
4850 2 : if (!(DefMI->getOperand(1).getReg() == AArch64::WZR &&
4851 1 : DefMI->getOperand(2).getReg() == AArch64::WZR) &&
4852 0 : !(DefMI->getOperand(1).getReg() == AArch64::XZR &&
4853 0 : DefMI->getOperand(2).getReg() == AArch64::XZR))
4854 : return false;
4855 :
4856 1 : if (DefMI->findRegisterDefOperandIdx(AArch64::NZCV, true) != -1)
4857 : return false;
4858 :
4859 1 : AArch64CC::CondCode CC = (AArch64CC::CondCode)DefMI->getOperand(3).getImm();
4860 : // Convert only when the condition code is not modified between
4861 : // the CSINC and the branch. The CC may be used by other
4862 : // instructions in between.
4863 1 : if (areCFlagsAccessedBetweenInstrs(DefMI, MI, &getRegisterInfo(), AK_Write))
4864 : return false;
4865 : MachineBasicBlock &RefToMBB = *MBB;
4866 2 : MachineBasicBlock *TBB = MI.getOperand(TargetBBInMI).getMBB();
4867 : DebugLoc DL = MI.getDebugLoc();
4868 1 : if (IsNegativeBranch)
4869 : CC = AArch64CC::getInvertedCondCode(CC);
4870 2 : BuildMI(RefToMBB, MI, DL, get(AArch64::Bcc)).addImm(CC).addMBB(TBB);
4871 1 : MI.eraseFromParent();
4872 : return true;
4873 : }
4874 : }
4875 : }
4876 :
4877 : std::pair<unsigned, unsigned>
4878 2514 : AArch64InstrInfo::decomposeMachineOperandsTargetFlags(unsigned TF) const {
4879 : const unsigned Mask = AArch64II::MO_FRAGMENT;
4880 2514 : return std::make_pair(TF & Mask, TF & ~Mask);
4881 : }
4882 :
4883 : ArrayRef<std::pair<unsigned, const char *>>
4884 2525 : AArch64InstrInfo::getSerializableDirectMachineOperandTargetFlags() const {
4885 : using namespace AArch64II;
4886 :
4887 : static const std::pair<unsigned, const char *> TargetFlags[] = {
4888 : {MO_PAGE, "aarch64-page"}, {MO_PAGEOFF, "aarch64-pageoff"},
4889 : {MO_G3, "aarch64-g3"}, {MO_G2, "aarch64-g2"},
4890 : {MO_G1, "aarch64-g1"}, {MO_G0, "aarch64-g0"},
4891 : {MO_HI12, "aarch64-hi12"}};
4892 2525 : return makeArrayRef(TargetFlags);
4893 : }
4894 :
4895 : ArrayRef<std::pair<unsigned, const char *>>
4896 1277 : AArch64InstrInfo::getSerializableBitmaskMachineOperandTargetFlags() const {
4897 : using namespace AArch64II;
4898 :
4899 : static const std::pair<unsigned, const char *> TargetFlags[] = {
4900 : {MO_COFFSTUB, "aarch64-coffstub"},
4901 : {MO_GOT, "aarch64-got"}, {MO_NC, "aarch64-nc"},
4902 : {MO_TLS, "aarch64-tls"}, {MO_DLLIMPORT, "aarch64-dllimport"}};
4903 1277 : return makeArrayRef(TargetFlags);
4904 : }
4905 :
4906 : ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
4907 42 : AArch64InstrInfo::getSerializableMachineMemOperandTargetFlags() const {
4908 : static const std::pair<MachineMemOperand::Flags, const char *> TargetFlags[] =
4909 : {{MOSuppressPair, "aarch64-suppress-pair"},
4910 : {MOStridedAccess, "aarch64-strided-access"}};
4911 42 : return makeArrayRef(TargetFlags);
4912 : }
4913 :
4914 : /// Constants defining how certain sequences should be outlined.
4915 : /// This encompasses how an outlined function should be called, and what kind of
4916 : /// frame should be emitted for that outlined function.
4917 : ///
4918 : /// \p MachineOutlinerDefault implies that the function should be called with
4919 : /// a save and restore of LR to the stack.
4920 : ///
4921 : /// That is,
4922 : ///
4923 : /// I1 Save LR OUTLINED_FUNCTION:
4924 : /// I2 --> BL OUTLINED_FUNCTION I1
4925 : /// I3 Restore LR I2
4926 : /// I3
4927 : /// RET
4928 : ///
4929 : /// * Call construction overhead: 3 (save + BL + restore)
4930 : /// * Frame construction overhead: 1 (ret)
4931 : /// * Requires stack fixups? Yes
4932 : ///
4933 : /// \p MachineOutlinerTailCall implies that the function is being created from
4934 : /// a sequence of instructions ending in a return.
4935 : ///
4936 : /// That is,
4937 : ///
4938 : /// I1 OUTLINED_FUNCTION:
4939 : /// I2 --> B OUTLINED_FUNCTION I1
4940 : /// RET I2
4941 : /// RET
4942 : ///
4943 : /// * Call construction overhead: 1 (B)
4944 : /// * Frame construction overhead: 0 (Return included in sequence)
4945 : /// * Requires stack fixups? No
4946 : ///
4947 : /// \p MachineOutlinerNoLRSave implies that the function should be called using
4948 : /// a BL instruction, but doesn't require LR to be saved and restored. This
4949 : /// happens when LR is known to be dead.
4950 : ///
4951 : /// That is,
4952 : ///
4953 : /// I1 OUTLINED_FUNCTION:
4954 : /// I2 --> BL OUTLINED_FUNCTION I1
4955 : /// I3 I2
4956 : /// I3
4957 : /// RET
4958 : ///
4959 : /// * Call construction overhead: 1 (BL)
4960 : /// * Frame construction overhead: 1 (RET)
4961 : /// * Requires stack fixups? No
4962 : ///
4963 : /// \p MachineOutlinerThunk implies that the function is being created from
4964 : /// a sequence of instructions ending in a call. The outlined function is
4965 : /// called with a BL instruction, and the outlined function tail-calls the
4966 : /// original call destination.
4967 : ///
4968 : /// That is,
4969 : ///
4970 : /// I1 OUTLINED_FUNCTION:
4971 : /// I2 --> BL OUTLINED_FUNCTION I1
4972 : /// BL f I2
4973 : /// B f
4974 : /// * Call construction overhead: 1 (BL)
4975 : /// * Frame construction overhead: 0
4976 : /// * Requires stack fixups? No
4977 : ///
4978 : /// \p MachineOutlinerRegSave implies that the function should be called with a
4979 : /// save and restore of LR to an available register. This allows us to avoid
4980 : /// stack fixups. Note that this outlining variant is compatible with the
4981 : /// NoLRSave case.
4982 : ///
4983 : /// That is,
4984 : ///
4985 : /// I1 Save LR OUTLINED_FUNCTION:
4986 : /// I2 --> BL OUTLINED_FUNCTION I1
4987 : /// I3 Restore LR I2
4988 : /// I3
4989 : /// RET
4990 : ///
4991 : /// * Call construction overhead: 3 (save + BL + restore)
4992 : /// * Frame construction overhead: 1 (ret)
4993 : /// * Requires stack fixups? No
4994 : enum MachineOutlinerClass {
4995 : MachineOutlinerDefault, /// Emit a save, restore, call, and return.
4996 : MachineOutlinerTailCall, /// Only emit a branch.
4997 : MachineOutlinerNoLRSave, /// Emit a call and return.
4998 : MachineOutlinerThunk, /// Emit a call and tail-call.
4999 : MachineOutlinerRegSave /// Same as default, but save to a register.
5000 : };
5001 :
5002 : enum MachineOutlinerMBBFlags {
5003 : LRUnavailableSomewhere = 0x2,
5004 : HasCalls = 0x4
5005 : };
5006 :
5007 : unsigned
5008 254 : AArch64InstrInfo::findRegisterToSaveLRTo(const outliner::Candidate &C) const {
5009 254 : MachineFunction *MF = C.getMF();
5010 : const AArch64RegisterInfo *ARI = static_cast<const AArch64RegisterInfo *>(
5011 254 : MF->getSubtarget().getRegisterInfo());
5012 :
5013 : // Check if there is an available register across the sequence that we can
5014 : // use.
5015 448 : for (unsigned Reg : AArch64::GPR64RegClass) {
5016 443 : if (!ARI->isReservedReg(*MF, Reg) &&
5017 856 : Reg != AArch64::LR && // LR is not reserved, but don't use it.
5018 428 : Reg != AArch64::X16 && // X16 is not guaranteed to be preserved.
5019 413 : Reg != AArch64::X17 && // Ditto for X17.
5020 1115 : C.LRU.available(Reg) && C.UsedInSequence.available(Reg))
5021 249 : return Reg;
5022 : }
5023 :
5024 : // No suitable register. Return 0.
5025 : return 0u;
5026 : }
5027 :
5028 : outliner::OutlinedFunction
5029 182 : AArch64InstrInfo::getOutliningCandidateInfo(
5030 : std::vector<outliner::Candidate> &RepeatedSequenceLocs) const {
5031 182 : unsigned SequenceSize = std::accumulate(
5032 182 : RepeatedSequenceLocs[0].front(),
5033 182 : std::next(RepeatedSequenceLocs[0].back()),
5034 : 0, [this](unsigned Sum, const MachineInstr &MI) {
5035 880 : return Sum + getInstSizeInBytes(MI);
5036 182 : });
5037 :
5038 : // Compute liveness information for each candidate.
5039 182 : const TargetRegisterInfo &TRI = getRegisterInfo();
5040 : std::for_each(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5041 451 : [&TRI](outliner::Candidate &C) { C.initLRU(TRI); });
5042 :
5043 : // According to the AArch64 Procedure Call Standard, the following are
5044 : // undefined on entry/exit from a function call:
5045 : //
5046 : // * Registers x16, x17, (and thus w16, w17)
5047 : // * Condition codes (and thus the NZCV register)
5048 : //
5049 : // Because if this, we can't outline any sequence of instructions where
5050 : // one
5051 : // of these registers is live into/across it. Thus, we need to delete
5052 : // those
5053 : // candidates.
5054 : auto CantGuaranteeValueAcrossCall = [](outliner::Candidate &C) {
5055 : LiveRegUnits LRU = C.LRU;
5056 : return (!LRU.available(AArch64::W16) || !LRU.available(AArch64::W17) ||
5057 : !LRU.available(AArch64::NZCV));
5058 : };
5059 :
5060 : // Erase every candidate that violates the restrictions above. (It could be
5061 : // true that we have viable candidates, so it's not worth bailing out in
5062 : // the case that, say, 1 out of 20 candidates violate the restructions.)
5063 : RepeatedSequenceLocs.erase(std::remove_if(RepeatedSequenceLocs.begin(),
5064 : RepeatedSequenceLocs.end(),
5065 182 : CantGuaranteeValueAcrossCall),
5066 : RepeatedSequenceLocs.end());
5067 :
5068 : // If the sequence is empty, we're done.
5069 182 : if (RepeatedSequenceLocs.empty())
5070 : return outliner::OutlinedFunction();
5071 :
5072 : // At this point, we have only "safe" candidates to outline. Figure out
5073 : // frame + call instruction information.
5074 :
5075 177 : unsigned LastInstrOpcode = RepeatedSequenceLocs[0].back()->getOpcode();
5076 :
5077 : // Helper lambda which sets call information for every candidate.
5078 : auto SetCandidateCallInfo =
5079 : [&RepeatedSequenceLocs](unsigned CallID, unsigned NumBytesForCall) {
5080 606 : for (outliner::Candidate &C : RepeatedSequenceLocs)
5081 : C.setCallInfo(CallID, NumBytesForCall);
5082 : };
5083 :
5084 : unsigned FrameID = MachineOutlinerDefault;
5085 : unsigned NumBytesToCreateFrame = 4;
5086 :
5087 : bool HasBTI =
5088 : std::any_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5089 : [](outliner::Candidate &C) {
5090 0 : return C.getMF()->getFunction().hasFnAttribute(
5091 : "branch-target-enforcement");
5092 : });
5093 :
5094 : // If the last instruction in any candidate is a terminator, then we should
5095 : // tail call all of the candidates.
5096 177 : if (RepeatedSequenceLocs[0].back()->isTerminator()) {
5097 : FrameID = MachineOutlinerTailCall;
5098 : NumBytesToCreateFrame = 0;
5099 : SetCandidateCallInfo(MachineOutlinerTailCall, 4);
5100 : }
5101 :
5102 142 : else if (LastInstrOpcode == AArch64::BL ||
5103 125 : (LastInstrOpcode == AArch64::BLR && !HasBTI)) {
5104 : // FIXME: Do we need to check if the code after this uses the value of LR?
5105 : FrameID = MachineOutlinerThunk;
5106 : NumBytesToCreateFrame = 0;
5107 : SetCandidateCallInfo(MachineOutlinerThunk, 4);
5108 : }
5109 :
5110 : // Make sure that LR isn't live on entry to this candidate. The only
5111 : // instructions that use LR that could possibly appear in a repeated sequence
5112 : // are calls. Therefore, we only have to check and see if LR is dead on entry
5113 : // to (or exit from) some candidate.
5114 120 : else if (std::all_of(RepeatedSequenceLocs.begin(),
5115 : RepeatedSequenceLocs.end(),
5116 : [](outliner::Candidate &C) {
5117 0 : return C.LRU.available(AArch64::LR);
5118 : })) {
5119 : FrameID = MachineOutlinerNoLRSave;
5120 : NumBytesToCreateFrame = 4;
5121 : SetCandidateCallInfo(MachineOutlinerNoLRSave, 4);
5122 : }
5123 :
5124 : // LR is live, so we need to save it. Decide whether it should be saved to
5125 : // the stack, or if it can be saved to a register.
5126 : else {
5127 103 : if (std::all_of(RepeatedSequenceLocs.begin(), RepeatedSequenceLocs.end(),
5128 : [this](outliner::Candidate &C) {
5129 0 : return findRegisterToSaveLRTo(C);
5130 : })) {
5131 : // Every candidate has an available callee-saved register for the save.
5132 : // We can save LR to a register.
5133 : FrameID = MachineOutlinerRegSave;
5134 : NumBytesToCreateFrame = 4;
5135 : SetCandidateCallInfo(MachineOutlinerRegSave, 12);
5136 : }
5137 :
5138 : else {
5139 : // At least one candidate does not have an available callee-saved
5140 : // register. We must save LR to the stack.
5141 : FrameID = MachineOutlinerDefault;
5142 : NumBytesToCreateFrame = 4;
5143 : SetCandidateCallInfo(MachineOutlinerDefault, 12);
5144 : }
5145 : }
5146 :
5147 : // Check if the range contains a call. These require a save + restore of the
5148 : // link register.
5149 177 : if (std::any_of(RepeatedSequenceLocs[0].front(),
5150 177 : RepeatedSequenceLocs[0].back(),
5151 : [](const MachineInstr &MI) { return MI.isCall(); }))
5152 7 : NumBytesToCreateFrame += 8; // Save + restore the link register.
5153 :
5154 : // Handle the last instruction separately. If this is a tail call, then the
5155 : // last instruction is a call. We don't want to save + restore in this case.
5156 : // However, it could be possible that the last instruction is a call without
5157 : // it being valid to tail call this sequence. We should consider this as well.
5158 340 : else if (FrameID != MachineOutlinerThunk &&
5159 283 : FrameID != MachineOutlinerTailCall &&
5160 113 : RepeatedSequenceLocs[0].back()->isCall())
5161 1 : NumBytesToCreateFrame += 8;
5162 :
5163 : return outliner::OutlinedFunction(RepeatedSequenceLocs, SequenceSize,
5164 177 : NumBytesToCreateFrame, FrameID);
5165 : }
5166 :
5167 117 : bool AArch64InstrInfo::isFunctionSafeToOutlineFrom(
5168 : MachineFunction &MF, bool OutlineFromLinkOnceODRs) const {
5169 117 : const Function &F = MF.getFunction();
5170 :
5171 : // Can F be deduplicated by the linker? If it can, don't outline from it.
5172 117 : if (!OutlineFromLinkOnceODRs && F.hasLinkOnceODRLinkage())
5173 : return false;
5174 :
5175 : // Don't outline from functions with section markings; the program could
5176 : // expect that all the code is in the named section.
5177 : // FIXME: Allow outlining from multiple functions with the same section
5178 : // marking.
5179 115 : if (F.hasSection())
5180 : return false;
5181 :
5182 : // Outlining from functions with redzones is unsafe since the outliner may
5183 : // modify the stack. Check if hasRedZone is true or unknown; if yes, don't
5184 : // outline from it.
5185 112 : AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
5186 336 : if (!AFI || AFI->hasRedZone().getValueOr(true))
5187 1 : return false;
5188 :
5189 : // It's safe to outline from MF.
5190 : return true;
5191 : }
5192 :
5193 : unsigned
5194 129 : AArch64InstrInfo::getMachineOutlinerMBBFlags(MachineBasicBlock &MBB) const {
5195 : unsigned Flags = 0x0;
5196 : // Check if there's a call inside this MachineBasicBlock. If there is, then
5197 : // set a flag.
5198 129 : if (std::any_of(MBB.begin(), MBB.end(),
5199 : [](MachineInstr &MI) { return MI.isCall(); }))
5200 : Flags |= MachineOutlinerMBBFlags::HasCalls;
5201 :
5202 : // Check if LR is available through all of the MBB. If it's not, then set
5203 : // a flag.
5204 : assert(MBB.getParent()->getRegInfo().tracksLiveness() &&
5205 : "Suitable Machine Function for outlining must track liveness");
5206 129 : LiveRegUnits LRU(getRegisterInfo());
5207 129 : LRU.addLiveOuts(MBB);
5208 :
5209 : std::for_each(MBB.rbegin(),
5210 : MBB.rend(),
5211 1418 : [&LRU](MachineInstr &MI) { LRU.accumulate(MI); });
5212 :
5213 129 : if (!LRU.available(AArch64::LR))
5214 113 : Flags |= MachineOutlinerMBBFlags::LRUnavailableSomewhere;
5215 :
5216 129 : return Flags;
5217 : }
5218 :
5219 : outliner::InstrType
5220 1289 : AArch64InstrInfo::getOutliningType(MachineBasicBlock::iterator &MIT,
5221 : unsigned Flags) const {
5222 : MachineInstr &MI = *MIT;
5223 1289 : MachineBasicBlock *MBB = MI.getParent();
5224 1289 : MachineFunction *MF = MBB->getParent();
5225 1289 : AArch64FunctionInfo *FuncInfo = MF->getInfo<AArch64FunctionInfo>();
5226 :
5227 : // Don't outline LOHs.
5228 1289 : if (FuncInfo->getLOHRelated().count(&MI))
5229 : return outliner::InstrType::Illegal;
5230 :
5231 : // Don't allow debug values to impact outlining type.
5232 : if (MI.isDebugInstr() || MI.isIndirectDebugValue())
5233 : return outliner::InstrType::Invisible;
5234 :
5235 : // At this point, KILL instructions don't really tell us much so we can go
5236 : // ahead and skip over them.
5237 1283 : if (MI.isKill())
5238 : return outliner::InstrType::Invisible;
5239 :
5240 : // Is this a terminator for a basic block?
5241 1282 : if (MI.isTerminator()) {
5242 :
5243 : // Is this the end of a function?
5244 216 : if (MI.getParent()->succ_empty())
5245 : return outliner::InstrType::Legal;
5246 :
5247 : // It's not, so don't outline it.
5248 7 : return outliner::InstrType::Illegal;
5249 : }
5250 :
5251 : // Make sure none of the operands are un-outlinable.
5252 4917 : for (const MachineOperand &MOP : MI.operands()) {
5253 3837 : if (MOP.isCPI() || MOP.isJTI() || MOP.isCFIIndex() || MOP.isFI() ||
5254 : MOP.isTargetIndex())
5255 : return outliner::InstrType::Illegal;
5256 :
5257 : // If it uses LR or W30 explicitly, then don't touch it.
5258 3776 : if (MOP.isReg() && !MOP.isImplicit() &&
5259 2402 : (MOP.getReg() == AArch64::LR || MOP.getReg() == AArch64::W30))
5260 : return outliner::InstrType::Illegal;
5261 : }
5262 :
5263 : // Special cases for instructions that can always be outlined, but will fail
5264 : // the later tests. e.g, ADRPs, which are PC-relative use LR, but can always
5265 : // be outlined because they don't require a *specific* value to be in LR.
5266 2160 : if (MI.getOpcode() == AArch64::ADRP)
5267 : return outliner::InstrType::Legal;
5268 :
5269 : // If MI is a call we might be able to outline it. We don't want to outline
5270 : // any calls that rely on the position of items on the stack. When we outline
5271 : // something containing a call, we have to emit a save and restore of LR in
5272 : // the outlined function. Currently, this always happens by saving LR to the
5273 : // stack. Thus, if we outline, say, half the parameters for a function call
5274 : // plus the call, then we'll break the callee's expectations for the layout
5275 : // of the stack.
5276 : //
5277 : // FIXME: Allow calls to functions which construct a stack frame, as long
5278 : // as they don't access arguments on the stack.
5279 : // FIXME: Figure out some way to analyze functions defined in other modules.
5280 : // We should be able to compute the memory usage based on the IR calling
5281 : // convention, even if we can't see the definition.
5282 1022 : if (MI.isCall()) {
5283 : // Get the function associated with the call. Look at each operand and find
5284 : // the one that represents the callee and get its name.
5285 : const Function *Callee = nullptr;
5286 56 : for (const MachineOperand &MOP : MI.operands()) {
5287 51 : if (MOP.isGlobal()) {
5288 16 : Callee = dyn_cast<Function>(MOP.getGlobal());
5289 : break;
5290 : }
5291 : }
5292 :
5293 : // Never outline calls to mcount. There isn't any rule that would require
5294 : // this, but the Linux kernel's "ftrace" feature depends on it.
5295 21 : if (Callee && Callee->getName() == "\01_mcount")
5296 : return outliner::InstrType::Illegal;
5297 :
5298 : // If we don't know anything about the callee, assume it depends on the
5299 : // stack layout of the caller. In that case, it's only legal to outline
5300 : // as a tail-call. Whitelist the call instructions we know about so we
5301 : // don't get unexpected results with call pseudo-instructions.
5302 : auto UnknownCallOutlineType = outliner::InstrType::Illegal;
5303 42 : if (MI.getOpcode() == AArch64::BLR || MI.getOpcode() == AArch64::BL)
5304 : UnknownCallOutlineType = outliner::InstrType::LegalTerminator;
5305 :
5306 21 : if (!Callee)
5307 : return UnknownCallOutlineType;
5308 :
5309 : // We have a function we have information about. Check it if it's something
5310 : // can safely outline.
5311 16 : MachineFunction *CalleeMF = MF->getMMI().getMachineFunction(*Callee);
5312 :
5313 : // We don't know what's going on with the callee at all. Don't touch it.
5314 16 : if (!CalleeMF)
5315 : return UnknownCallOutlineType;
5316 :
5317 : // Check if we know anything about the callee saves on the function. If we
5318 : // don't, then don't touch it, since that implies that we haven't
5319 : // computed anything about its stack frame yet.
5320 10 : MachineFrameInfo &MFI = CalleeMF->getFrameInfo();
5321 10 : if (!MFI.isCalleeSavedInfoValid() || MFI.getStackSize() > 0 ||
5322 : MFI.getNumObjects() > 0)
5323 2 : return UnknownCallOutlineType;
5324 :
5325 : // At this point, we can say that CalleeMF ought to not pass anything on the
5326 : // stack. Therefore, we can outline it.
5327 : return outliner::InstrType::Legal;
5328 : }
5329 :
5330 : // Don't outline positions.
5331 : if (MI.isPosition())
5332 : return outliner::InstrType::Illegal;
5333 :
5334 : // Don't touch the link register or W30.
5335 2002 : if (MI.readsRegister(AArch64::W30, &getRegisterInfo()) ||
5336 : MI.modifiesRegister(AArch64::W30, &getRegisterInfo()))
5337 0 : return outliner::InstrType::Illegal;
5338 :
5339 : // Does this use the stack?
5340 1951 : if (MI.modifiesRegister(AArch64::SP, &RI) ||
5341 : MI.readsRegister(AArch64::SP, &RI)) {
5342 : // True if there is no chance that any outlined candidate from this range
5343 : // could require stack fixups. That is, both
5344 : // * LR is available in the range (No save/restore around call)
5345 : // * The range doesn't include calls (No save/restore in outlined frame)
5346 : // are true.
5347 : // FIXME: This is very restrictive; the flags check the whole block,
5348 : // not just the bit we will try to outline.
5349 : bool MightNeedStackFixUp =
5350 157 : (Flags & (MachineOutlinerMBBFlags::LRUnavailableSomewhere |
5351 : MachineOutlinerMBBFlags::HasCalls));
5352 :
5353 : // If this instruction is in a range where it *never* needs to be fixed
5354 : // up, then we can *always* outline it. This is true even if it's not
5355 : // possible to fix that instruction up.
5356 : //
5357 : // Why? Consider two equivalent instructions I1, I2 where both I1 and I2
5358 : // use SP. Suppose that I1 sits within a range that definitely doesn't
5359 : // need stack fixups, while I2 sits in a range that does.
5360 : //
5361 : // First, I1 can be outlined as long as we *never* fix up the stack in
5362 : // any sequence containing it. I1 is already a safe instruction in the
5363 : // original program, so as long as we don't modify it we're good to go.
5364 : // So this leaves us with showing that outlining I2 won't break our
5365 : // program.
5366 : //
5367 : // Suppose I1 and I2 belong to equivalent candidate sequences. When we
5368 : // look at I2, we need to see if it can be fixed up. Suppose I2, (and
5369 : // thus I1) cannot be fixed up. Then I2 will be assigned an unique
5370 : // integer label; thus, I2 cannot belong to any candidate sequence (a
5371 : // contradiction). Suppose I2 can be fixed up. Then I1 can be fixed up
5372 : // as well, so we're good. Thus, I1 is always safe to outline.
5373 : //
5374 : // This gives us two things: first off, it buys us some more instructions
5375 : // for our search space by deeming stack instructions illegal only when
5376 : // they can't be fixed up AND we might have to fix them up. Second off,
5377 : // This allows us to catch tricky instructions like, say,
5378 : // %xi = ADDXri %sp, n, 0. We can't safely outline these since they might
5379 : // be paired with later SUBXris, which might *not* end up being outlined.
5380 : // If we mess with the stack to save something, then an ADDXri messes with
5381 : // it *after*, then we aren't going to restore the right something from
5382 : // the stack if we don't outline the corresponding SUBXri first. ADDXris and
5383 : // SUBXris are extremely common in prologue/epilogue code, so supporting
5384 : // them in the outliner can be a pretty big win!
5385 157 : if (!MightNeedStackFixUp)
5386 : return outliner::InstrType::Legal;
5387 :
5388 : // Any modification of SP will break our code to save/restore LR.
5389 : // FIXME: We could handle some instructions which add a constant offset to
5390 : // SP, with a bit more work.
5391 155 : if (MI.modifiesRegister(AArch64::SP, &RI))
5392 : return outliner::InstrType::Illegal;
5393 :
5394 : // At this point, we have a stack instruction that we might need to fix
5395 : // up. We'll handle it if it's a load or store.
5396 104 : if (MI.mayLoadOrStore()) {
5397 : unsigned Base; // Filled with the base regiser of MI.
5398 : int64_t Offset; // Filled with the offset of MI.
5399 : unsigned DummyWidth;
5400 :
5401 : // Does it allow us to offset the base register and is the base SP?
5402 97 : if (!getMemOpBaseRegImmOfsWidth(MI, Base, Offset, DummyWidth, &RI) ||
5403 97 : Base != AArch64::SP)
5404 : return outliner::InstrType::Illegal;
5405 :
5406 : // Find the minimum/maximum offset for this instruction and check if
5407 : // fixing it up would be in range.
5408 : int64_t MinOffset, MaxOffset; // Unscaled offsets for the instruction.
5409 : unsigned Scale; // The scale to multiply the offsets by.
5410 194 : getMemOpInfo(MI.getOpcode(), Scale, DummyWidth, MinOffset, MaxOffset);
5411 :
5412 : // TODO: We should really test what happens if an instruction overflows.
5413 : // This is tricky to test with IR tests, but when the outliner is moved
5414 : // to a MIR test, it really ought to be checked.
5415 97 : Offset += 16; // Update the offset to what it would be if we outlined.
5416 97 : if (Offset < MinOffset * Scale || Offset > MaxOffset * Scale)
5417 : return outliner::InstrType::Illegal;
5418 :
5419 : // It's in range, so we can outline it.
5420 97 : return outliner::InstrType::Legal;
5421 : }
5422 :
5423 : // FIXME: Add handling for instructions like "add x0, sp, #8".
5424 :
5425 : // We can't fix it up, so don't outline it.
5426 : return outliner::InstrType::Illegal;
5427 : }
5428 :
5429 : return outliner::InstrType::Legal;
5430 : }
5431 :
5432 3 : void AArch64InstrInfo::fixupPostOutline(MachineBasicBlock &MBB) const {
5433 21 : for (MachineInstr &MI : MBB) {
5434 : unsigned Base, Width;
5435 : int64_t Offset;
5436 :
5437 : // Is this a load or store with an immediate offset with SP as the base?
5438 18 : if (!MI.mayLoadOrStore() ||
5439 18 : !getMemOpBaseRegImmOfsWidth(MI, Base, Offset, Width, &RI) ||
5440 0 : Base != AArch64::SP)
5441 18 : continue;
5442 :
5443 : // It is, so we have to fix it up.
5444 : unsigned Scale;
5445 : int64_t Dummy1, Dummy2;
5446 :
5447 0 : MachineOperand &StackOffsetOperand = getMemOpBaseRegImmOfsOffsetOperand(MI);
5448 : assert(StackOffsetOperand.isImm() && "Stack offset wasn't immediate!");
5449 0 : getMemOpInfo(MI.getOpcode(), Scale, Width, Dummy1, Dummy2);
5450 : assert(Scale != 0 && "Unexpected opcode!");
5451 :
5452 : // We've pushed the return address to the stack, so add 16 to the offset.
5453 : // This is safe, since we already checked if it would overflow when we
5454 : // checked if this instruction was legal to outline.
5455 0 : int64_t NewImm = (Offset + 16) / Scale;
5456 : StackOffsetOperand.setImm(NewImm);
5457 : }
5458 3 : }
5459 :
5460 26 : void AArch64InstrInfo::buildOutlinedFrame(
5461 : MachineBasicBlock &MBB, MachineFunction &MF,
5462 : const outliner::OutlinedFunction &OF) const {
5463 : // For thunk outlining, rewrite the last instruction from a call to a
5464 : // tail-call.
5465 26 : if (OF.FrameConstructionID == MachineOutlinerThunk) {
5466 : MachineInstr *Call = &*--MBB.instr_end();
5467 : unsigned TailOpcode;
5468 10 : if (Call->getOpcode() == AArch64::BL) {
5469 : TailOpcode = AArch64::TCRETURNdi;
5470 : } else {
5471 : assert(Call->getOpcode() == AArch64::BLR);
5472 : TailOpcode = AArch64::TCRETURNriALL;
5473 : }
5474 15 : MachineInstr *TC = BuildMI(MF, DebugLoc(), get(TailOpcode))
5475 5 : .add(Call->getOperand(0))
5476 : .addImm(0);
5477 : MBB.insert(MBB.end(), TC);
5478 5 : Call->eraseFromParent();
5479 : }
5480 :
5481 : // Is there a call in the outlined range?
5482 : auto IsNonTailCall = [](MachineInstr &MI) {
5483 : return MI.isCall() && !MI.isReturn();
5484 : };
5485 26 : if (std::any_of(MBB.instr_begin(), MBB.instr_end(), IsNonTailCall)) {
5486 : // Fix up the instructions in the range, since we're going to modify the
5487 : // stack.
5488 : assert(OF.FrameConstructionID != MachineOutlinerDefault &&
5489 : "Can only fix up stack references once");
5490 2 : fixupPostOutline(MBB);
5491 :
5492 : // LR has to be a live in so that we can save it.
5493 : MBB.addLiveIn(AArch64::LR);
5494 :
5495 : MachineBasicBlock::iterator It = MBB.begin();
5496 : MachineBasicBlock::iterator Et = MBB.end();
5497 :
5498 2 : if (OF.FrameConstructionID == MachineOutlinerTailCall ||
5499 : OF.FrameConstructionID == MachineOutlinerThunk)
5500 0 : Et = std::prev(MBB.end());
5501 :
5502 : // Insert a save before the outlined region
5503 6 : MachineInstr *STRXpre = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5504 2 : .addReg(AArch64::SP, RegState::Define)
5505 2 : .addReg(AArch64::LR)
5506 2 : .addReg(AArch64::SP)
5507 2 : .addImm(-16);
5508 : It = MBB.insert(It, STRXpre);
5509 :
5510 2 : const TargetSubtargetInfo &STI = MF.getSubtarget();
5511 2 : const MCRegisterInfo *MRI = STI.getRegisterInfo();
5512 2 : unsigned DwarfReg = MRI->getDwarfRegNum(AArch64::LR, true);
5513 :
5514 : // Add a CFI saying the stack was moved 16 B down.
5515 : int64_t StackPosEntry =
5516 2 : MF.addFrameInst(MCCFIInstruction::createDefCfaOffset(nullptr, 16));
5517 4 : BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5518 : .addCFIIndex(StackPosEntry)
5519 : .setMIFlags(MachineInstr::FrameSetup);
5520 :
5521 : // Add a CFI saying that the LR that we want to find is now 16 B higher than
5522 : // before.
5523 : int64_t LRPosEntry =
5524 2 : MF.addFrameInst(MCCFIInstruction::createOffset(nullptr, DwarfReg, 16));
5525 4 : BuildMI(MBB, It, DebugLoc(), get(AArch64::CFI_INSTRUCTION))
5526 : .addCFIIndex(LRPosEntry)
5527 : .setMIFlags(MachineInstr::FrameSetup);
5528 :
5529 : // Insert a restore before the terminator for the function.
5530 6 : MachineInstr *LDRXpost = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5531 2 : .addReg(AArch64::SP, RegState::Define)
5532 2 : .addReg(AArch64::LR, RegState::Define)
5533 2 : .addReg(AArch64::SP)
5534 2 : .addImm(16);
5535 : Et = MBB.insert(Et, LDRXpost);
5536 : }
5537 :
5538 : // If this is a tail call outlined function, then there's already a return.
5539 26 : if (OF.FrameConstructionID == MachineOutlinerTailCall ||
5540 : OF.FrameConstructionID == MachineOutlinerThunk)
5541 : return;
5542 :
5543 : // It's not a tail call, so we have to insert the return ourselves.
5544 42 : MachineInstr *ret = BuildMI(MF, DebugLoc(), get(AArch64::RET))
5545 14 : .addReg(AArch64::LR, RegState::Undef);
5546 : MBB.insert(MBB.end(), ret);
5547 :
5548 : // Did we have to modify the stack by saving the link register?
5549 14 : if (OF.FrameConstructionID != MachineOutlinerDefault)
5550 : return;
5551 :
5552 : // We modified the stack.
5553 : // Walk over the basic block and fix up all the stack accesses.
5554 1 : fixupPostOutline(MBB);
5555 : }
5556 :
5557 75 : MachineBasicBlock::iterator AArch64InstrInfo::insertOutlinedCall(
5558 : Module &M, MachineBasicBlock &MBB, MachineBasicBlock::iterator &It,
5559 : MachineFunction &MF, const outliner::Candidate &C) const {
5560 :
5561 : // Are we tail calling?
5562 75 : if (C.CallConstructionID == MachineOutlinerTailCall) {
5563 : // If yes, then we can just branch to the label.
5564 52 : It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::TCRETURNdi))
5565 26 : .addGlobalAddress(M.getNamedValue(MF.getName()))
5566 26 : .addImm(0));
5567 26 : return It;
5568 : }
5569 :
5570 : // Are we saving the link register?
5571 49 : if (C.CallConstructionID == MachineOutlinerNoLRSave ||
5572 : C.CallConstructionID == MachineOutlinerThunk) {
5573 : // No, so just insert the call.
5574 48 : It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5575 48 : .addGlobalAddress(M.getNamedValue(MF.getName())));
5576 24 : return It;
5577 : }
5578 :
5579 : // We want to return the spot where we inserted the call.
5580 : MachineBasicBlock::iterator CallPt;
5581 :
5582 : // Instructions for saving and restoring LR around the call instruction we're
5583 : // going to insert.
5584 : MachineInstr *Save;
5585 : MachineInstr *Restore;
5586 : // Can we save to a register?
5587 25 : if (C.CallConstructionID == MachineOutlinerRegSave) {
5588 : // FIXME: This logic should be sunk into a target-specific interface so that
5589 : // we don't have to recompute the register.
5590 22 : unsigned Reg = findRegisterToSaveLRTo(C);
5591 : assert(Reg != 0 && "No callee-saved register available?");
5592 :
5593 : // Save and restore LR from that register.
5594 44 : Save = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), Reg)
5595 22 : .addReg(AArch64::XZR)
5596 22 : .addReg(AArch64::LR)
5597 22 : .addImm(0);
5598 66 : Restore = BuildMI(MF, DebugLoc(), get(AArch64::ORRXrs), AArch64::LR)
5599 22 : .addReg(AArch64::XZR)
5600 22 : .addReg(Reg)
5601 22 : .addImm(0);
5602 : } else {
5603 : // We have the default case. Save and restore from SP.
5604 6 : Save = BuildMI(MF, DebugLoc(), get(AArch64::STRXpre))
5605 3 : .addReg(AArch64::SP, RegState::Define)
5606 3 : .addReg(AArch64::LR)
5607 3 : .addReg(AArch64::SP)
5608 3 : .addImm(-16);
5609 9 : Restore = BuildMI(MF, DebugLoc(), get(AArch64::LDRXpost))
5610 3 : .addReg(AArch64::SP, RegState::Define)
5611 3 : .addReg(AArch64::LR, RegState::Define)
5612 3 : .addReg(AArch64::SP)
5613 3 : .addImm(16);
5614 : }
5615 :
5616 : It = MBB.insert(It, Save);
5617 : It++;
5618 :
5619 : // Insert the call.
5620 50 : It = MBB.insert(It, BuildMI(MF, DebugLoc(), get(AArch64::BL))
5621 50 : .addGlobalAddress(M.getNamedValue(MF.getName())));
5622 25 : CallPt = It;
5623 : It++;
5624 :
5625 25 : It = MBB.insert(It, Restore);
5626 25 : return CallPt;
5627 : }
5628 :
5629 14017 : bool AArch64InstrInfo::shouldOutlineFromFunctionByDefault(
5630 : MachineFunction &MF) const {
5631 14017 : return MF.getFunction().optForMinSize();
5632 : }
|