LLVM 20.0.0git
AArch64ExpandPseudoInsts.cpp
Go to the documentation of this file.
1//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands pseudo instructions into target
10// instructions to allow proper scheduling and other late optimizations. This
11// pass should be run after register allocation but before the post-regalloc
12// scheduling pass.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AArch64ExpandImm.h"
17#include "AArch64InstrInfo.h"
19#include "AArch64Subtarget.h"
31#include "llvm/IR/DebugLoc.h"
32#include "llvm/MC/MCInstrDesc.h"
33#include "llvm/Pass.h"
37#include <cassert>
38#include <cstdint>
39#include <iterator>
40
41using namespace llvm;
42
43#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
44
45namespace {
46
47class AArch64ExpandPseudo : public MachineFunctionPass {
48public:
49 const AArch64InstrInfo *TII;
50
51 static char ID;
52
53 AArch64ExpandPseudo() : MachineFunctionPass(ID) {
55 }
56
57 bool runOnMachineFunction(MachineFunction &Fn) override;
58
59 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
60
61private:
62 bool expandMBB(MachineBasicBlock &MBB);
65 bool expandMultiVecPseudo(MachineBasicBlock &MBB,
67 TargetRegisterClass ContiguousClass,
68 TargetRegisterClass StridedClass,
69 unsigned ContiguousOpc, unsigned StridedOpc);
70 bool expandFormTuplePseudo(MachineBasicBlock &MBB,
73 unsigned Size);
75 unsigned BitSize);
76
77 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
80 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
81 unsigned ExtendImm, unsigned ZeroReg,
83 bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
86 bool expandSetTagLoop(MachineBasicBlock &MBB,
89 bool expandSVESpillFill(MachineBasicBlock &MBB,
91 unsigned N);
92 bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
95 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
97 MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB,
99 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
101};
102
103} // end anonymous namespace
104
105char AArch64ExpandPseudo::ID = 0;
106
107INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
108 AARCH64_EXPAND_PSEUDO_NAME, false, false)
109
110/// Transfer implicit operands on the pseudo instruction to the
111/// instructions created from the expansion.
112static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
114 const MCInstrDesc &Desc = OldMI.getDesc();
115 for (const MachineOperand &MO :
116 llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
117 assert(MO.isReg() && MO.getReg());
118 if (MO.isUse())
119 UseMI.add(MO);
120 else
121 DefMI.add(MO);
122 }
123}
124
125/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
126/// real move-immediate instructions to synthesize the immediate.
127bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
129 unsigned BitSize) {
130 MachineInstr &MI = *MBBI;
131 Register DstReg = MI.getOperand(0).getReg();
132 uint64_t RenamableState =
133 MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
134 uint64_t Imm = MI.getOperand(1).getImm();
135
136 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
137 // Useless def, and we don't want to risk creating an invalid ORR (which
138 // would really write to sp).
139 MI.eraseFromParent();
140 return true;
141 }
142
144 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
145 assert(Insn.size() != 0);
146
148 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
149 bool LastItem = std::next(I) == E;
150 switch (I->Opcode)
151 {
152 default: llvm_unreachable("unhandled!"); break;
153
154 case AArch64::ORRWri:
155 case AArch64::ORRXri:
156 if (I->Op1 == 0) {
157 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
158 .add(MI.getOperand(0))
159 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
160 .addImm(I->Op2));
161 } else {
162 Register DstReg = MI.getOperand(0).getReg();
163 bool DstIsDead = MI.getOperand(0).isDead();
164 MIBS.push_back(
165 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
166 .addReg(DstReg, RegState::Define |
167 getDeadRegState(DstIsDead && LastItem) |
168 RenamableState)
169 .addReg(DstReg)
170 .addImm(I->Op2));
171 }
172 break;
173 case AArch64::ORRWrs:
174 case AArch64::ORRXrs: {
175 Register DstReg = MI.getOperand(0).getReg();
176 bool DstIsDead = MI.getOperand(0).isDead();
177 MIBS.push_back(
178 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
179 .addReg(DstReg, RegState::Define |
180 getDeadRegState(DstIsDead && LastItem) |
181 RenamableState)
182 .addReg(DstReg)
183 .addReg(DstReg)
184 .addImm(I->Op2));
185 } break;
186 case AArch64::ANDXri:
187 case AArch64::EORXri:
188 if (I->Op1 == 0) {
189 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
190 .add(MI.getOperand(0))
191 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
192 .addImm(I->Op2));
193 } else {
194 Register DstReg = MI.getOperand(0).getReg();
195 bool DstIsDead = MI.getOperand(0).isDead();
196 MIBS.push_back(
197 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
198 .addReg(DstReg, RegState::Define |
199 getDeadRegState(DstIsDead && LastItem) |
200 RenamableState)
201 .addReg(DstReg)
202 .addImm(I->Op2));
203 }
204 break;
205 case AArch64::MOVNWi:
206 case AArch64::MOVNXi:
207 case AArch64::MOVZWi:
208 case AArch64::MOVZXi: {
209 bool DstIsDead = MI.getOperand(0).isDead();
210 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
211 .addReg(DstReg, RegState::Define |
212 getDeadRegState(DstIsDead && LastItem) |
213 RenamableState)
214 .addImm(I->Op1)
215 .addImm(I->Op2));
216 } break;
217 case AArch64::MOVKWi:
218 case AArch64::MOVKXi: {
219 Register DstReg = MI.getOperand(0).getReg();
220 bool DstIsDead = MI.getOperand(0).isDead();
221 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
222 .addReg(DstReg,
224 getDeadRegState(DstIsDead && LastItem) |
225 RenamableState)
226 .addReg(DstReg)
227 .addImm(I->Op1)
228 .addImm(I->Op2));
229 } break;
230 }
231 }
232 transferImpOps(MI, MIBS.front(), MIBS.back());
233 MI.eraseFromParent();
234 return true;
235}
236
237bool AArch64ExpandPseudo::expandCMP_SWAP(
239 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
240 MachineBasicBlock::iterator &NextMBBI) {
241 MachineInstr &MI = *MBBI;
242 MIMetadata MIMD(MI);
243 const MachineOperand &Dest = MI.getOperand(0);
244 Register StatusReg = MI.getOperand(1).getReg();
245 bool StatusDead = MI.getOperand(1).isDead();
246 // Duplicating undef operands into 2 instructions does not guarantee the same
247 // value on both; However undef should be replaced by xzr anyway.
248 assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
249 Register AddrReg = MI.getOperand(2).getReg();
250 Register DesiredReg = MI.getOperand(3).getReg();
251 Register NewReg = MI.getOperand(4).getReg();
252
254 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
255 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
256 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
257
258 MF->insert(++MBB.getIterator(), LoadCmpBB);
259 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
260 MF->insert(++StoreBB->getIterator(), DoneBB);
261
262 // .Lloadcmp:
263 // mov wStatus, 0
264 // ldaxr xDest, [xAddr]
265 // cmp xDest, xDesired
266 // b.ne .Ldone
267 if (!StatusDead)
268 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg)
269 .addImm(0).addImm(0);
270 BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg())
271 .addReg(AddrReg);
272 BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg)
273 .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
274 .addReg(DesiredReg)
275 .addImm(ExtendImm);
276 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc))
278 .addMBB(DoneBB)
279 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
280 LoadCmpBB->addSuccessor(DoneBB);
281 LoadCmpBB->addSuccessor(StoreBB);
282
283 // .Lstore:
284 // stlxr wStatus, xNew, [xAddr]
285 // cbnz wStatus, .Lloadcmp
286 BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg)
287 .addReg(NewReg)
288 .addReg(AddrReg);
289 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
290 .addReg(StatusReg, getKillRegState(StatusDead))
291 .addMBB(LoadCmpBB);
292 StoreBB->addSuccessor(LoadCmpBB);
293 StoreBB->addSuccessor(DoneBB);
294
295 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
296 DoneBB->transferSuccessors(&MBB);
297
298 MBB.addSuccessor(LoadCmpBB);
299
300 NextMBBI = MBB.end();
301 MI.eraseFromParent();
302
303 // Recompute livein lists.
304 LivePhysRegs LiveRegs;
305 computeAndAddLiveIns(LiveRegs, *DoneBB);
306 computeAndAddLiveIns(LiveRegs, *StoreBB);
307 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
308 // Do an extra pass around the loop to get loop carried registers right.
309 StoreBB->clearLiveIns();
310 computeAndAddLiveIns(LiveRegs, *StoreBB);
311 LoadCmpBB->clearLiveIns();
312 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
313
314 return true;
315}
316
317bool AArch64ExpandPseudo::expandCMP_SWAP_128(
319 MachineBasicBlock::iterator &NextMBBI) {
320 MachineInstr &MI = *MBBI;
321 MIMetadata MIMD(MI);
322 MachineOperand &DestLo = MI.getOperand(0);
323 MachineOperand &DestHi = MI.getOperand(1);
324 Register StatusReg = MI.getOperand(2).getReg();
325 bool StatusDead = MI.getOperand(2).isDead();
326 // Duplicating undef operands into 2 instructions does not guarantee the same
327 // value on both; However undef should be replaced by xzr anyway.
328 assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
329 Register AddrReg = MI.getOperand(3).getReg();
330 Register DesiredLoReg = MI.getOperand(4).getReg();
331 Register DesiredHiReg = MI.getOperand(5).getReg();
332 Register NewLoReg = MI.getOperand(6).getReg();
333 Register NewHiReg = MI.getOperand(7).getReg();
334
335 unsigned LdxpOp, StxpOp;
336
337 switch (MI.getOpcode()) {
338 case AArch64::CMP_SWAP_128_MONOTONIC:
339 LdxpOp = AArch64::LDXPX;
340 StxpOp = AArch64::STXPX;
341 break;
342 case AArch64::CMP_SWAP_128_RELEASE:
343 LdxpOp = AArch64::LDXPX;
344 StxpOp = AArch64::STLXPX;
345 break;
346 case AArch64::CMP_SWAP_128_ACQUIRE:
347 LdxpOp = AArch64::LDAXPX;
348 StxpOp = AArch64::STXPX;
349 break;
350 case AArch64::CMP_SWAP_128:
351 LdxpOp = AArch64::LDAXPX;
352 StxpOp = AArch64::STLXPX;
353 break;
354 default:
355 llvm_unreachable("Unexpected opcode");
356 }
357
359 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
360 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
361 auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
362 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
363
364 MF->insert(++MBB.getIterator(), LoadCmpBB);
365 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
366 MF->insert(++StoreBB->getIterator(), FailBB);
367 MF->insert(++FailBB->getIterator(), DoneBB);
368
369 // .Lloadcmp:
370 // ldaxp xDestLo, xDestHi, [xAddr]
371 // cmp xDestLo, xDesiredLo
372 // sbcs xDestHi, xDesiredHi
373 // b.ne .Ldone
374 BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp))
375 .addReg(DestLo.getReg(), RegState::Define)
376 .addReg(DestHi.getReg(), RegState::Define)
377 .addReg(AddrReg);
378 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
379 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
380 .addReg(DesiredLoReg)
381 .addImm(0);
382 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
383 .addUse(AArch64::WZR)
384 .addUse(AArch64::WZR)
386 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
387 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
388 .addReg(DesiredHiReg)
389 .addImm(0);
390 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
391 .addUse(StatusReg, RegState::Kill)
392 .addUse(StatusReg, RegState::Kill)
394 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW))
395 .addUse(StatusReg, getKillRegState(StatusDead))
396 .addMBB(FailBB);
397 LoadCmpBB->addSuccessor(FailBB);
398 LoadCmpBB->addSuccessor(StoreBB);
399
400 // .Lstore:
401 // stlxp wStatus, xNewLo, xNewHi, [xAddr]
402 // cbnz wStatus, .Lloadcmp
403 BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg)
404 .addReg(NewLoReg)
405 .addReg(NewHiReg)
406 .addReg(AddrReg);
407 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
408 .addReg(StatusReg, getKillRegState(StatusDead))
409 .addMBB(LoadCmpBB);
410 BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB);
411 StoreBB->addSuccessor(LoadCmpBB);
412 StoreBB->addSuccessor(DoneBB);
413
414 // .Lfail:
415 // stlxp wStatus, xDestLo, xDestHi, [xAddr]
416 // cbnz wStatus, .Lloadcmp
417 BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg)
418 .addReg(DestLo.getReg())
419 .addReg(DestHi.getReg())
420 .addReg(AddrReg);
421 BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW))
422 .addReg(StatusReg, getKillRegState(StatusDead))
423 .addMBB(LoadCmpBB);
424 FailBB->addSuccessor(LoadCmpBB);
425 FailBB->addSuccessor(DoneBB);
426
427 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
428 DoneBB->transferSuccessors(&MBB);
429
430 MBB.addSuccessor(LoadCmpBB);
431
432 NextMBBI = MBB.end();
433 MI.eraseFromParent();
434
435 // Recompute liveness bottom up.
436 LivePhysRegs LiveRegs;
437 computeAndAddLiveIns(LiveRegs, *DoneBB);
438 computeAndAddLiveIns(LiveRegs, *FailBB);
439 computeAndAddLiveIns(LiveRegs, *StoreBB);
440 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
441
442 // Do an extra pass in the loop to get the loop carried dependencies right.
443 FailBB->clearLiveIns();
444 computeAndAddLiveIns(LiveRegs, *FailBB);
445 StoreBB->clearLiveIns();
446 computeAndAddLiveIns(LiveRegs, *StoreBB);
447 LoadCmpBB->clearLiveIns();
448 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
449
450 return true;
451}
452
453/// \brief Expand Pseudos to Instructions with destructive operands.
454///
455/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
456/// or for fixing relaxed register allocation conditions to comply with
457/// the instructions register constraints. The latter case may be cheaper
458/// than setting the register constraints in the register allocator,
459/// since that will insert regular MOV instructions rather than MOVPRFX.
460///
461/// Example (after register allocation):
462///
463/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
464///
465/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
466/// * We cannot map directly to FSUB_ZPmZ_B because the register
467/// constraints of the instruction are not met.
468/// * Also the _ZERO specifies the false lanes need to be zeroed.
469///
470/// We first try to see if the destructive operand == result operand,
471/// if not, we try to swap the operands, e.g.
472///
473/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
474///
475/// But because FSUB_ZPmZ is not commutative, this is semantically
476/// different, so we need a reverse instruction:
477///
478/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
479///
480/// Then we implement the zeroing of the false lanes of Z0 by adding
481/// a zeroing MOVPRFX instruction:
482///
483/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
484/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
485///
486/// Note that this can only be done for _ZERO or _UNDEF variants where
487/// we can guarantee the false lanes to be zeroed (by implementing this)
488/// or that they are undef (don't care / not used), otherwise the
489/// swapping of operands is illegal because the operation is not
490/// (or cannot be emulated to be) fully commutative.
491bool AArch64ExpandPseudo::expand_DestructiveOp(
495 unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
496 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
497 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
498 bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
499 Register DstReg = MI.getOperand(0).getReg();
500 bool DstIsDead = MI.getOperand(0).isDead();
501 bool UseRev = false;
502 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
503
504 switch (DType) {
507 if (DstReg == MI.getOperand(3).getReg()) {
508 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
509 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
510 UseRev = true;
511 break;
512 }
513 [[fallthrough]];
516 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
517 break;
519 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
520 break;
522 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
523 if (DstReg == MI.getOperand(3).getReg()) {
524 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
525 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
526 UseRev = true;
527 } else if (DstReg == MI.getOperand(4).getReg()) {
528 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
529 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
530 UseRev = true;
531 }
532 break;
533 default:
534 llvm_unreachable("Unsupported Destructive Operand type");
535 }
536
537 // MOVPRFX can only be used if the destination operand
538 // is the destructive operand, not as any other operand,
539 // so the Destructive Operand must be unique.
540 bool DOPRegIsUnique = false;
541 switch (DType) {
543 DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();
544 break;
547 DOPRegIsUnique =
548 DstReg != MI.getOperand(DOPIdx).getReg() ||
549 MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
550 break;
553 DOPRegIsUnique = true;
554 break;
556 DOPRegIsUnique =
557 DstReg != MI.getOperand(DOPIdx).getReg() ||
558 (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
559 MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
560 break;
561 }
562
563 // Resolve the reverse opcode
564 if (UseRev) {
565 int NewOpcode;
566 // e.g. DIV -> DIVR
567 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
568 Opcode = NewOpcode;
569 // e.g. DIVR -> DIV
570 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
571 Opcode = NewOpcode;
572 }
573
574 // Get the right MOVPRFX
575 uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
576 unsigned MovPrfx, LSLZero, MovPrfxZero;
577 switch (ElementSize) {
580 MovPrfx = AArch64::MOVPRFX_ZZ;
581 LSLZero = AArch64::LSL_ZPmI_B;
582 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
583 break;
585 MovPrfx = AArch64::MOVPRFX_ZZ;
586 LSLZero = AArch64::LSL_ZPmI_H;
587 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
588 break;
590 MovPrfx = AArch64::MOVPRFX_ZZ;
591 LSLZero = AArch64::LSL_ZPmI_S;
592 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
593 break;
595 MovPrfx = AArch64::MOVPRFX_ZZ;
596 LSLZero = AArch64::LSL_ZPmI_D;
597 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
598 break;
599 default:
600 llvm_unreachable("Unsupported ElementSize");
601 }
602
603 //
604 // Create the destructive operation (if required)
605 //
606 MachineInstrBuilder PRFX, DOP;
607 if (FalseZero) {
608 // If we cannot prefix the requested instruction we'll instead emit a
609 // prefixed_zeroing_mov for DestructiveBinary.
610 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
613 "The destructive operand should be unique");
614 assert(ElementSize != AArch64::ElementSizeNone &&
615 "This instruction is unpredicated");
616
617 // Merge source operand into destination register
618 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
619 .addReg(DstReg, RegState::Define)
620 .addReg(MI.getOperand(PredIdx).getReg())
621 .addReg(MI.getOperand(DOPIdx).getReg());
622
623 // After the movprfx, the destructive operand is same as Dst
624 DOPIdx = 0;
625
626 // Create the additional LSL to zero the lanes when the DstReg is not
627 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
628 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
629 if ((DType == AArch64::DestructiveBinary ||
632 !DOPRegIsUnique) {
633 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
634 .addReg(DstReg, RegState::Define)
635 .add(MI.getOperand(PredIdx))
636 .addReg(DstReg)
637 .addImm(0);
638 }
639 } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
640 assert(DOPRegIsUnique && "The destructive operand should be unique");
641 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
642 .addReg(DstReg, RegState::Define)
643 .addReg(MI.getOperand(DOPIdx).getReg());
644 DOPIdx = 0;
645 }
646
647 //
648 // Create the destructive operation
649 //
650 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
651 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
652
653 switch (DType) {
655 DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
656 .add(MI.getOperand(PredIdx))
657 .add(MI.getOperand(SrcIdx));
658 break;
663 DOP.add(MI.getOperand(PredIdx))
664 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
665 .add(MI.getOperand(SrcIdx));
666 break;
668 DOP.add(MI.getOperand(PredIdx))
669 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
670 .add(MI.getOperand(SrcIdx))
671 .add(MI.getOperand(Src2Idx));
672 break;
673 }
674
675 if (PRFX) {
677 transferImpOps(MI, PRFX, DOP);
678 } else
679 transferImpOps(MI, DOP, DOP);
680
681 MI.eraseFromParent();
682 return true;
683}
684
685bool AArch64ExpandPseudo::expandSetTagLoop(
687 MachineBasicBlock::iterator &NextMBBI) {
688 MachineInstr &MI = *MBBI;
689 DebugLoc DL = MI.getDebugLoc();
690 Register SizeReg = MI.getOperand(0).getReg();
691 Register AddressReg = MI.getOperand(1).getReg();
692
694
695 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
696 const unsigned OpCode1 =
697 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
698 const unsigned OpCode2 =
699 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
700
701 unsigned Size = MI.getOperand(2).getImm();
702 assert(Size > 0 && Size % 16 == 0);
703 if (Size % (16 * 2) != 0) {
704 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
705 .addReg(AddressReg)
706 .addReg(AddressReg)
707 .addImm(1);
708 Size -= 16;
709 }
711 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
712 .addImm(Size);
713 expandMOVImm(MBB, I, 64);
714
715 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
716 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
717
718 MF->insert(++MBB.getIterator(), LoopBB);
719 MF->insert(++LoopBB->getIterator(), DoneBB);
720
721 BuildMI(LoopBB, DL, TII->get(OpCode2))
722 .addDef(AddressReg)
723 .addReg(AddressReg)
724 .addReg(AddressReg)
725 .addImm(2)
727 .setMIFlags(MI.getFlags());
728 BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri))
729 .addDef(SizeReg)
730 .addReg(SizeReg)
731 .addImm(16 * 2)
732 .addImm(0);
733 BuildMI(LoopBB, DL, TII->get(AArch64::Bcc))
735 .addMBB(LoopBB)
736 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
737
738 LoopBB->addSuccessor(LoopBB);
739 LoopBB->addSuccessor(DoneBB);
740
741 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
742 DoneBB->transferSuccessors(&MBB);
743
744 MBB.addSuccessor(LoopBB);
745
746 NextMBBI = MBB.end();
747 MI.eraseFromParent();
748 // Recompute liveness bottom up.
749 LivePhysRegs LiveRegs;
750 computeAndAddLiveIns(LiveRegs, *DoneBB);
751 computeAndAddLiveIns(LiveRegs, *LoopBB);
752 // Do an extra pass in the loop to get the loop carried dependencies right.
753 // FIXME: is this necessary?
754 LoopBB->clearLiveIns();
755 computeAndAddLiveIns(LiveRegs, *LoopBB);
756 DoneBB->clearLiveIns();
757 computeAndAddLiveIns(LiveRegs, *DoneBB);
758
759 return true;
760}
761
762bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
764 unsigned Opc, unsigned N) {
765 assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
766 Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
767 "Unexpected opcode");
768 unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI)
770 : 0;
771 unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
772 ? AArch64::zsub0
773 : AArch64::psub0;
774 const TargetRegisterInfo *TRI =
776 MachineInstr &MI = *MBBI;
777 for (unsigned Offset = 0; Offset < N; ++Offset) {
778 int ImmOffset = MI.getOperand(2).getImm() + Offset;
779 bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
780 assert(ImmOffset >= -256 && ImmOffset < 256 &&
781 "Immediate spill offset out of range");
782 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
783 .addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset),
784 RState)
785 .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
786 .addImm(ImmOffset);
787 }
788 MI.eraseFromParent();
789 return true;
790}
791
792// Create a call with the passed opcode and explicit operands, copying over all
793// the implicit operands from *MBBI, starting at the regmask.
796 const AArch64InstrInfo *TII,
797 unsigned Opcode,
798 ArrayRef<MachineOperand> ExplicitOps,
799 unsigned RegMaskStartIdx) {
800 // Build the MI, with explicit operands first (including the call target).
801 MachineInstr *Call = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode))
802 .add(ExplicitOps)
803 .getInstr();
804
805 // Register arguments are added during ISel, but cannot be added as explicit
806 // operands of the branch as it expects to be B <target> which is only one
807 // operand. Instead they are implicit operands used by the branch.
808 while (!MBBI->getOperand(RegMaskStartIdx).isRegMask()) {
809 const MachineOperand &MOP = MBBI->getOperand(RegMaskStartIdx);
810 assert(MOP.isReg() && "can only add register operands");
811 Call->addOperand(MachineOperand::CreateReg(
812 MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,
813 /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
814 RegMaskStartIdx++;
815 }
816 for (const MachineOperand &MO :
817 llvm::drop_begin(MBBI->operands(), RegMaskStartIdx))
818 Call->addOperand(MO);
819
820 return Call;
821}
822
823// Create a call to CallTarget, copying over all the operands from *MBBI,
824// starting at the regmask.
827 const AArch64InstrInfo *TII,
828 MachineOperand &CallTarget,
829 unsigned RegMaskStartIdx) {
830 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
831
832 assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
833 "invalid operand for regular call");
834 return createCallWithOps(MBB, MBBI, TII, Opc, CallTarget, RegMaskStartIdx);
835}
836
837bool AArch64ExpandPseudo::expandCALL_RVMARKER(
839 // Expand CALL_RVMARKER pseudo to:
840 // - a branch to the call target, followed by
841 // - the special `mov x29, x29` marker, and
842 // - another branch, to the runtime function
843 // Mark the sequence as bundle, to avoid passes moving other code in between.
844 MachineInstr &MI = *MBBI;
845 MachineOperand &RVTarget = MI.getOperand(0);
846 assert(RVTarget.isGlobal() && "invalid operand for attached call");
847
848 MachineInstr *OriginalCall = nullptr;
849
850 if (MI.getOpcode() == AArch64::BLRA_RVMARKER) {
851 // ptrauth call.
852 const MachineOperand &CallTarget = MI.getOperand(1);
853 const MachineOperand &Key = MI.getOperand(2);
854 const MachineOperand &IntDisc = MI.getOperand(3);
855 const MachineOperand &AddrDisc = MI.getOperand(4);
856
857 assert((Key.getImm() == AArch64PACKey::IA ||
858 Key.getImm() == AArch64PACKey::IB) &&
859 "Invalid auth call key");
860
861 MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc};
862
863 OriginalCall = createCallWithOps(MBB, MBBI, TII, AArch64::BLRA, Ops,
864 /*RegMaskStartIdx=*/5);
865 } else {
866 assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI");
867 OriginalCall = createCall(MBB, MBBI, TII, MI.getOperand(1),
868 // Regmask starts after the RV and call targets.
869 /*RegMaskStartIdx=*/2);
870 }
871
872 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
873 .addReg(AArch64::FP, RegState::Define)
874 .addReg(AArch64::XZR)
875 .addReg(AArch64::FP)
876 .addImm(0);
877
878 auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
879 .add(RVTarget)
880 .getInstr();
881
882 if (MI.shouldUpdateCallSiteInfo())
883 MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);
884
885 MI.eraseFromParent();
886 finalizeBundle(MBB, OriginalCall->getIterator(),
887 std::next(RVCall->getIterator()));
888 return true;
889}
890
891bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
893 // Expand CALL_BTI pseudo to:
894 // - a branch to the call target
895 // - a BTI instruction
896 // Mark the sequence as a bundle, to avoid passes moving other code in
897 // between.
898 MachineInstr &MI = *MBBI;
899 MachineInstr *Call = createCall(MBB, MBBI, TII, MI.getOperand(0),
900 // Regmask starts after the call target.
901 /*RegMaskStartIdx=*/1);
902
903 Call->setCFIType(*MBB.getParent(), MI.getCFIType());
904
905 MachineInstr *BTI =
906 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
907 // BTI J so that setjmp can to BR to this.
908 .addImm(36)
909 .getInstr();
910
911 if (MI.shouldUpdateCallSiteInfo())
912 MBB.getParent()->moveCallSiteInfo(&MI, Call);
913
914 MI.eraseFromParent();
915 finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
916 return true;
917}
918
919bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
921 Register CtxReg = MBBI->getOperand(0).getReg();
922 Register BaseReg = MBBI->getOperand(1).getReg();
923 int Offset = MBBI->getOperand(2).getImm();
924 DebugLoc DL(MBBI->getDebugLoc());
925 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
926
927 if (STI.getTargetTriple().getArchName() != "arm64e") {
928 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
929 .addUse(CtxReg)
930 .addUse(BaseReg)
931 .addImm(Offset / 8)
934 return true;
935 }
936
937 // We need to sign the context in an address-discriminated way. 0xc31a is a
938 // fixed random value, chosen as part of the ABI.
939 // add x16, xBase, #Offset
940 // movk x16, #0xc31a, lsl #48
941 // mov x17, x22/xzr
942 // pacdb x17, x16
943 // str x17, [xBase, #Offset]
944 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
945 BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
946 .addUse(BaseReg)
947 .addImm(abs(Offset))
948 .addImm(0)
950 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
951 .addUse(AArch64::X16)
952 .addImm(0xc31a)
953 .addImm(48)
955 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
956 // move it somewhere before signing.
957 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
958 .addUse(AArch64::XZR)
959 .addUse(CtxReg)
960 .addImm(0)
962 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
963 .addUse(AArch64::X17)
964 .addUse(AArch64::X16)
966 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
967 .addUse(AArch64::X17)
968 .addUse(BaseReg)
969 .addImm(Offset / 8)
971
973 return true;
974}
975
977AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB,
979 MachineInstr &MI = *MBBI;
980 assert((std::next(MBBI) != MBB.end() ||
981 MI.getParent()->successors().begin() !=
982 MI.getParent()->successors().end()) &&
983 "Unexpected unreachable in block that restores ZA");
984
985 // Compare TPIDR2_EL0 value against 0.
986 DebugLoc DL = MI.getDebugLoc();
987 MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX))
988 .add(MI.getOperand(0));
989
990 // Split MBB and create two new blocks:
991 // - MBB now contains all instructions before RestoreZAPseudo.
992 // - SMBB contains the RestoreZAPseudo instruction only.
993 // - EndBB contains all instructions after RestoreZAPseudo.
994 MachineInstr &PrevMI = *std::prev(MBBI);
995 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
996 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
997 ? *SMBB->successors().begin()
998 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
999
1000 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
1001 Cbz.addMBB(SMBB);
1002 BuildMI(&MBB, DL, TII->get(AArch64::B))
1003 .addMBB(EndBB);
1004 MBB.addSuccessor(EndBB);
1005
1006 // Replace the pseudo with a call (BL).
1008 BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL));
1009 MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit);
1010 for (unsigned I = 2; I < MI.getNumOperands(); ++I)
1011 MIB.add(MI.getOperand(I));
1012 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1013
1014 MI.eraseFromParent();
1015 return EndBB;
1016}
1017
1019AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
1021 MachineInstr &MI = *MBBI;
1022 // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
1023 // Exception handling code generated by Clang may introduce unreachables and it
1024 // seems unnecessary to restore pstate.sm when that happens. Note that it is
1025 // not just an optimisation, the code below expects a successor instruction/block
1026 // in order to split the block at MBBI.
1027 if (std::next(MBBI) == MBB.end() &&
1028 MI.getParent()->successors().begin() ==
1029 MI.getParent()->successors().end()) {
1030 MI.eraseFromParent();
1031 return &MBB;
1032 }
1033
1034 // Expand the pseudo into smstart or smstop instruction. The pseudo has the
1035 // following operands:
1036 //
1037 // MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask>
1038 //
1039 // The pseudo is expanded into a conditional smstart/smstop, with a
1040 // check if pstate.sm (register) equals the expected value, and if not,
1041 // invokes the smstart/smstop.
1042 //
1043 // As an example, the following block contains a normal call from a
1044 // streaming-compatible function:
1045 //
1046 // OrigBB:
1047 // MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTOP
1048 // bl @normal_callee
1049 // MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTART
1050 //
1051 // ...which will be transformed into:
1052 //
1053 // OrigBB:
1054 // TBNZx %0:gpr64, 0, SMBB
1055 // b EndBB
1056 //
1057 // SMBB:
1058 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP
1059 //
1060 // EndBB:
1061 // bl @normal_callee
1062 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART
1063 //
1064 DebugLoc DL = MI.getDebugLoc();
1065
1066 // Create the conditional branch based on the third operand of the
1067 // instruction, which tells us if we are wrapping a normal or streaming
1068 // function.
1069 // We test the live value of pstate.sm and toggle pstate.sm if this is not the
1070 // expected value for the callee (0 for a normal callee and 1 for a streaming
1071 // callee).
1072 unsigned Opc;
1073 switch (MI.getOperand(2).getImm()) {
1074 case AArch64SME::Always:
1075 llvm_unreachable("Should have matched to instruction directly");
1077 Opc = AArch64::TBNZW;
1078 break;
1080 Opc = AArch64::TBZW;
1081 break;
1082 }
1083 auto PStateSM = MI.getOperand(3).getReg();
1085 unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
1087 BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);
1088
1089 // Split MBB and create two new blocks:
1090 // - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
1091 // - SMBB contains the MSRcond_pstatesvcrImm1 instruction only.
1092 // - EndBB contains all instructions after MSRcond_pstatesvcrImm1.
1093 MachineInstr &PrevMI = *std::prev(MBBI);
1094 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
1095 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
1096 ? *SMBB->successors().begin()
1097 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
1098
1099 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
1100 Tbx.addMBB(SMBB);
1101 BuildMI(&MBB, DL, TII->get(AArch64::B))
1102 .addMBB(EndBB);
1103 MBB.addSuccessor(EndBB);
1104
1105 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1106 MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(),
1107 TII->get(AArch64::MSRpstatesvcrImm1));
1108 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1109 // these contain the CopyFromReg for the first argument and the flag to
1110 // indicate whether the callee is streaming or normal).
1111 MIB.add(MI.getOperand(0));
1112 MIB.add(MI.getOperand(1));
1113 for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1114 MIB.add(MI.getOperand(i));
1115
1116 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1117
1118 MI.eraseFromParent();
1119 return EndBB;
1120}
1121
1122bool AArch64ExpandPseudo::expandMultiVecPseudo(
1124 TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1125 unsigned ContiguousOp, unsigned StridedOpc) {
1126 MachineInstr &MI = *MBBI;
1127 Register Tuple = MI.getOperand(0).getReg();
1128
1129 auto ContiguousRange = ContiguousClass.getRegisters();
1130 auto StridedRange = StridedClass.getRegisters();
1131 unsigned Opc;
1132 if (llvm::is_contained(ContiguousRange, Tuple.asMCReg())) {
1133 Opc = ContiguousOp;
1134 } else if (llvm::is_contained(StridedRange, Tuple.asMCReg())) {
1135 Opc = StridedOpc;
1136 } else
1137 llvm_unreachable("Cannot expand Multi-Vector pseudo");
1138
1139 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
1140 .add(MI.getOperand(0))
1141 .add(MI.getOperand(1))
1142 .add(MI.getOperand(2))
1143 .add(MI.getOperand(3));
1144 transferImpOps(MI, MIB, MIB);
1145 MI.eraseFromParent();
1146 return true;
1147}
1148
1149bool AArch64ExpandPseudo::expandFormTuplePseudo(
1151 MachineBasicBlock::iterator &NextMBBI, unsigned Size) {
1152 assert((Size == 2 || Size == 4) && "Invalid Tuple Size");
1153 MachineInstr &MI = *MBBI;
1154 Register ReturnTuple = MI.getOperand(0).getReg();
1155
1156 const TargetRegisterInfo *TRI =
1158 for (unsigned I = 0; I < Size; ++I) {
1159 Register FormTupleOpReg = MI.getOperand(I + 1).getReg();
1160 Register ReturnTupleSubReg =
1161 TRI->getSubReg(ReturnTuple, AArch64::zsub0 + I);
1162 // Add copies to ensure the subregisters remain in the correct order
1163 // for any contigious operation they are used by.
1164 if (FormTupleOpReg != ReturnTupleSubReg)
1165 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORR_ZZZ))
1166 .addReg(ReturnTupleSubReg, RegState::Define)
1167 .addReg(FormTupleOpReg)
1168 .addReg(FormTupleOpReg);
1169 }
1170
1171 MI.eraseFromParent();
1172 return true;
1173}
1174
1175/// If MBBI references a pseudo instruction that should be expanded here,
1176/// do the expansion and return true. Otherwise return false.
1177bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
1179 MachineBasicBlock::iterator &NextMBBI) {
1180 MachineInstr &MI = *MBBI;
1181 unsigned Opcode = MI.getOpcode();
1182
1183 // Check if we can expand the destructive op
1184 int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
1185 if (OrigInstr != -1) {
1186 auto &Orig = TII->get(OrigInstr);
1187 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1189 return expand_DestructiveOp(MI, MBB, MBBI);
1190 }
1191 }
1192
1193 switch (Opcode) {
1194 default:
1195 break;
1196
1197 case AArch64::BSPv8i8:
1198 case AArch64::BSPv16i8: {
1199 Register DstReg = MI.getOperand(0).getReg();
1200 if (DstReg == MI.getOperand(3).getReg()) {
1201 // Expand to BIT
1202 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1203 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1204 : AArch64::BITv16i8))
1205 .add(MI.getOperand(0))
1206 .add(MI.getOperand(3))
1207 .add(MI.getOperand(2))
1208 .add(MI.getOperand(1));
1209 } else if (DstReg == MI.getOperand(2).getReg()) {
1210 // Expand to BIF
1211 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1212 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1213 : AArch64::BIFv16i8))
1214 .add(MI.getOperand(0))
1215 .add(MI.getOperand(2))
1216 .add(MI.getOperand(3))
1217 .add(MI.getOperand(1));
1218 } else {
1219 // Expand to BSL, use additional move if required
1220 if (DstReg == MI.getOperand(1).getReg()) {
1221 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1222 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1223 : AArch64::BSLv16i8))
1224 .add(MI.getOperand(0))
1225 .add(MI.getOperand(1))
1226 .add(MI.getOperand(2))
1227 .add(MI.getOperand(3));
1228 } else {
1229 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1230 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1231 : AArch64::ORRv16i8))
1232 .addReg(DstReg,
1234 getRenamableRegState(MI.getOperand(0).isRenamable()))
1235 .add(MI.getOperand(1))
1236 .add(MI.getOperand(1));
1237 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1238 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1239 : AArch64::BSLv16i8))
1240 .add(MI.getOperand(0))
1241 .addReg(DstReg,
1243 getRenamableRegState(MI.getOperand(0).isRenamable()))
1244 .add(MI.getOperand(2))
1245 .add(MI.getOperand(3));
1246 }
1247 }
1248 MI.eraseFromParent();
1249 return true;
1250 }
1251
1252 case AArch64::ADDWrr:
1253 case AArch64::SUBWrr:
1254 case AArch64::ADDXrr:
1255 case AArch64::SUBXrr:
1256 case AArch64::ADDSWrr:
1257 case AArch64::SUBSWrr:
1258 case AArch64::ADDSXrr:
1259 case AArch64::SUBSXrr:
1260 case AArch64::ANDWrr:
1261 case AArch64::ANDXrr:
1262 case AArch64::BICWrr:
1263 case AArch64::BICXrr:
1264 case AArch64::ANDSWrr:
1265 case AArch64::ANDSXrr:
1266 case AArch64::BICSWrr:
1267 case AArch64::BICSXrr:
1268 case AArch64::EONWrr:
1269 case AArch64::EONXrr:
1270 case AArch64::EORWrr:
1271 case AArch64::EORXrr:
1272 case AArch64::ORNWrr:
1273 case AArch64::ORNXrr:
1274 case AArch64::ORRWrr:
1275 case AArch64::ORRXrr: {
1276 unsigned Opcode;
1277 switch (MI.getOpcode()) {
1278 default:
1279 return false;
1280 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
1281 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
1282 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
1283 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
1284 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
1285 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
1286 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
1287 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
1288 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
1289 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
1290 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
1291 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
1292 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
1293 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
1294 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
1295 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
1296 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
1297 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
1298 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
1299 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
1300 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
1301 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
1302 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
1303 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
1304 }
1305 MachineFunction &MF = *MBB.getParent();
1306 // Try to create new inst without implicit operands added.
1307 MachineInstr *NewMI = MF.CreateMachineInstr(
1308 TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
1309 MBB.insert(MBBI, NewMI);
1310 MachineInstrBuilder MIB1(MF, NewMI);
1311 MIB1->setPCSections(MF, MI.getPCSections());
1312 MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
1313 .add(MI.getOperand(1))
1314 .add(MI.getOperand(2))
1316 transferImpOps(MI, MIB1, MIB1);
1317 if (auto DebugNumber = MI.peekDebugInstrNum())
1318 NewMI->setDebugInstrNum(DebugNumber);
1319 MI.eraseFromParent();
1320 return true;
1321 }
1322
1323 case AArch64::LOADgot: {
1325 Register DstReg = MI.getOperand(0).getReg();
1326 const MachineOperand &MO1 = MI.getOperand(1);
1327 unsigned Flags = MO1.getTargetFlags();
1328
1329 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1330 // Tiny codemodel expand to LDR
1331 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1332 TII->get(AArch64::LDRXl), DstReg);
1333
1334 if (MO1.isGlobal()) {
1335 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
1336 } else if (MO1.isSymbol()) {
1337 MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
1338 } else {
1339 assert(MO1.isCPI() &&
1340 "Only expect globals, externalsymbols, or constant pools");
1341 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
1342 }
1343 } else {
1344 // Small codemodel expand into ADRP + LDR.
1345 MachineFunction &MF = *MI.getParent()->getParent();
1346 DebugLoc DL = MI.getDebugLoc();
1347 MachineInstrBuilder MIB1 =
1348 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
1349
1353 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
1354 unsigned DstFlags = MI.getOperand(0).getTargetFlags();
1355 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
1356 .addDef(Reg32)
1357 .addReg(DstReg, RegState::Kill)
1358 .addReg(DstReg, DstFlags | RegState::Implicit);
1359 } else {
1360 Register DstReg = MI.getOperand(0).getReg();
1361 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
1362 .add(MI.getOperand(0))
1363 .addUse(DstReg, RegState::Kill);
1364 }
1365
1366 if (MO1.isGlobal()) {
1367 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
1368 MIB2.addGlobalAddress(MO1.getGlobal(), 0,
1370 } else if (MO1.isSymbol()) {
1372 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
1375 } else {
1376 assert(MO1.isCPI() &&
1377 "Only expect globals, externalsymbols, or constant pools");
1378 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1379 Flags | AArch64II::MO_PAGE);
1380 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1381 Flags | AArch64II::MO_PAGEOFF |
1383 }
1384
1385 transferImpOps(MI, MIB1, MIB2);
1386 }
1387 MI.eraseFromParent();
1388 return true;
1389 }
1390 case AArch64::MOVaddrBA: {
1391 MachineFunction &MF = *MI.getParent()->getParent();
1393 // blockaddress expressions have to come from a constant pool because the
1394 // largest addend (and hence offset within a function) allowed for ADRP is
1395 // only 8MB.
1396 const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
1397 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1398
1400 unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
1401
1402 Register DstReg = MI.getOperand(0).getReg();
1403 auto MIB1 =
1404 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1406 auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1407 TII->get(AArch64::LDRXui), DstReg)
1408 .addUse(DstReg)
1411 transferImpOps(MI, MIB1, MIB2);
1412 MI.eraseFromParent();
1413 return true;
1414 }
1415 }
1416 [[fallthrough]];
1417 case AArch64::MOVaddr:
1418 case AArch64::MOVaddrJT:
1419 case AArch64::MOVaddrCP:
1420 case AArch64::MOVaddrTLS:
1421 case AArch64::MOVaddrEXT: {
1422 // Expand into ADRP + ADD.
1423 Register DstReg = MI.getOperand(0).getReg();
1424 assert(DstReg != AArch64::XZR);
1425 MachineInstrBuilder MIB1 =
1426 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1427 .add(MI.getOperand(1));
1428
1429 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1430 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1431 // We do so by creating a MOVK that sets bits 48-63 of the register to
1432 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1433 // the small code model so we can assume a binary size of <= 4GB, which
1434 // makes the untagged PC relative offset positive. The binary must also be
1435 // loaded into address range [0, 2^48). Both of these properties need to
1436 // be ensured at runtime when using tagged addresses.
1437 auto Tag = MI.getOperand(1);
1438 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1439 Tag.setOffset(0x100000000);
1440 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1441 .addReg(DstReg)
1442 .add(Tag)
1443 .addImm(48);
1444 }
1445
1446 MachineInstrBuilder MIB2 =
1447 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1448 .add(MI.getOperand(0))
1449 .addReg(DstReg)
1450 .add(MI.getOperand(2))
1451 .addImm(0);
1452
1453 transferImpOps(MI, MIB1, MIB2);
1454 MI.eraseFromParent();
1455 return true;
1456 }
1457 case AArch64::ADDlowTLS:
1458 // Produce a plain ADD
1459 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1460 .add(MI.getOperand(0))
1461 .add(MI.getOperand(1))
1462 .add(MI.getOperand(2))
1463 .addImm(0);
1464 MI.eraseFromParent();
1465 return true;
1466
1467 case AArch64::MOVbaseTLS: {
1468 Register DstReg = MI.getOperand(0).getReg();
1469 auto SysReg = AArch64SysReg::TPIDR_EL0;
1471 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1472 SysReg = AArch64SysReg::TPIDR_EL3;
1473 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1474 SysReg = AArch64SysReg::TPIDR_EL2;
1475 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1476 SysReg = AArch64SysReg::TPIDR_EL1;
1477 else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1478 SysReg = AArch64SysReg::TPIDRRO_EL0;
1479 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1480 .addImm(SysReg);
1481 MI.eraseFromParent();
1482 return true;
1483 }
1484
1485 case AArch64::MOVi32imm:
1486 return expandMOVImm(MBB, MBBI, 32);
1487 case AArch64::MOVi64imm:
1488 return expandMOVImm(MBB, MBBI, 64);
1489 case AArch64::RET_ReallyLR: {
1490 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1491 // function and missing live-ins. We are fine in practice because callee
1492 // saved register handling ensures the register value is restored before
1493 // RET, but we need the undef flag here to appease the MachineVerifier
1494 // liveness checks.
1496 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1497 .addReg(AArch64::LR, RegState::Undef);
1498 transferImpOps(MI, MIB, MIB);
1499 MI.eraseFromParent();
1500 return true;
1501 }
1502 case AArch64::CMP_SWAP_8:
1503 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1504 AArch64::SUBSWrx,
1506 AArch64::WZR, NextMBBI);
1507 case AArch64::CMP_SWAP_16:
1508 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1509 AArch64::SUBSWrx,
1511 AArch64::WZR, NextMBBI);
1512 case AArch64::CMP_SWAP_32:
1513 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1514 AArch64::SUBSWrs,
1516 AArch64::WZR, NextMBBI);
1517 case AArch64::CMP_SWAP_64:
1518 return expandCMP_SWAP(MBB, MBBI,
1519 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1521 AArch64::XZR, NextMBBI);
1522 case AArch64::CMP_SWAP_128:
1523 case AArch64::CMP_SWAP_128_RELEASE:
1524 case AArch64::CMP_SWAP_128_ACQUIRE:
1525 case AArch64::CMP_SWAP_128_MONOTONIC:
1526 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1527
1528 case AArch64::AESMCrrTied:
1529 case AArch64::AESIMCrrTied: {
1531 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1532 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1533 AArch64::AESIMCrr))
1534 .add(MI.getOperand(0))
1535 .add(MI.getOperand(1));
1536 transferImpOps(MI, MIB, MIB);
1537 MI.eraseFromParent();
1538 return true;
1539 }
1540 case AArch64::IRGstack: {
1541 MachineFunction &MF = *MBB.getParent();
1543 const AArch64FrameLowering *TFI =
1544 MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1545
1546 // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1547 // almost always point to SP-after-prologue; if not, emit a longer
1548 // instruction sequence.
1549 int BaseOffset = -AFI->getTaggedBasePointerOffset();
1550 Register FrameReg;
1551 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1552 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1553 /*PreferFP=*/false,
1554 /*ForSimm=*/true);
1555 Register SrcReg = FrameReg;
1556 if (FrameRegOffset) {
1557 // Use output register as temporary.
1558 SrcReg = MI.getOperand(0).getReg();
1559 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1560 FrameRegOffset, TII);
1561 }
1562 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1563 .add(MI.getOperand(0))
1564 .addUse(SrcReg)
1565 .add(MI.getOperand(2));
1566 MI.eraseFromParent();
1567 return true;
1568 }
1569 case AArch64::TAGPstack: {
1570 int64_t Offset = MI.getOperand(2).getImm();
1571 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1572 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1573 .add(MI.getOperand(0))
1574 .add(MI.getOperand(1))
1575 .addImm(std::abs(Offset))
1576 .add(MI.getOperand(4));
1577 MI.eraseFromParent();
1578 return true;
1579 }
1580 case AArch64::STGloop_wback:
1581 case AArch64::STZGloop_wback:
1582 return expandSetTagLoop(MBB, MBBI, NextMBBI);
1583 case AArch64::STGloop:
1584 case AArch64::STZGloop:
1586 "Non-writeback variants of STGloop / STZGloop should not "
1587 "survive past PrologEpilogInserter.");
1588 case AArch64::STR_ZZZZXI:
1589 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1590 case AArch64::STR_ZZZXI:
1591 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1592 case AArch64::STR_ZZXI:
1593 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1594 case AArch64::STR_PPXI:
1595 return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);
1596 case AArch64::LDR_ZZZZXI:
1597 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1598 case AArch64::LDR_ZZZXI:
1599 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1600 case AArch64::LDR_ZZXI:
1601 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1602 case AArch64::LDR_PPXI:
1603 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);
1604 case AArch64::BLR_RVMARKER:
1605 case AArch64::BLRA_RVMARKER:
1606 return expandCALL_RVMARKER(MBB, MBBI);
1607 case AArch64::BLR_BTI:
1608 return expandCALL_BTI(MBB, MBBI);
1609 case AArch64::StoreSwiftAsyncContext:
1610 return expandStoreSwiftAsyncContext(MBB, MBBI);
1611 case AArch64::RestoreZAPseudo: {
1612 auto *NewMBB = expandRestoreZA(MBB, MBBI);
1613 if (NewMBB != &MBB)
1614 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1615 return true;
1616 }
1617 case AArch64::MSRpstatePseudo: {
1618 auto *NewMBB = expandCondSMToggle(MBB, MBBI);
1619 if (NewMBB != &MBB)
1620 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1621 return true;
1622 }
1623 case AArch64::COALESCER_BARRIER_FPR16:
1624 case AArch64::COALESCER_BARRIER_FPR32:
1625 case AArch64::COALESCER_BARRIER_FPR64:
1626 case AArch64::COALESCER_BARRIER_FPR128:
1627 MI.eraseFromParent();
1628 return true;
1629 case AArch64::LD1B_2Z_IMM_PSEUDO:
1630 return expandMultiVecPseudo(
1631 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1632 AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM);
1633 case AArch64::LD1H_2Z_IMM_PSEUDO:
1634 return expandMultiVecPseudo(
1635 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1636 AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM);
1637 case AArch64::LD1W_2Z_IMM_PSEUDO:
1638 return expandMultiVecPseudo(
1639 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1640 AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM);
1641 case AArch64::LD1D_2Z_IMM_PSEUDO:
1642 return expandMultiVecPseudo(
1643 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1644 AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM);
1645 case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1646 return expandMultiVecPseudo(
1647 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1648 AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM);
1649 case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1650 return expandMultiVecPseudo(
1651 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1652 AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM);
1653 case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1654 return expandMultiVecPseudo(
1655 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1656 AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM);
1657 case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1658 return expandMultiVecPseudo(
1659 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1660 AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM);
1661 case AArch64::LD1B_2Z_PSEUDO:
1662 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1663 AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z,
1664 AArch64::LD1B_2Z_STRIDED);
1665 case AArch64::LD1H_2Z_PSEUDO:
1666 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1667 AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z,
1668 AArch64::LD1H_2Z_STRIDED);
1669 case AArch64::LD1W_2Z_PSEUDO:
1670 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1671 AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z,
1672 AArch64::LD1W_2Z_STRIDED);
1673 case AArch64::LD1D_2Z_PSEUDO:
1674 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1675 AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z,
1676 AArch64::LD1D_2Z_STRIDED);
1677 case AArch64::LDNT1B_2Z_PSEUDO:
1678 return expandMultiVecPseudo(
1679 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1680 AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED);
1681 case AArch64::LDNT1H_2Z_PSEUDO:
1682 return expandMultiVecPseudo(
1683 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1684 AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED);
1685 case AArch64::LDNT1W_2Z_PSEUDO:
1686 return expandMultiVecPseudo(
1687 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1688 AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED);
1689 case AArch64::LDNT1D_2Z_PSEUDO:
1690 return expandMultiVecPseudo(
1691 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1692 AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED);
1693 case AArch64::LD1B_4Z_IMM_PSEUDO:
1694 return expandMultiVecPseudo(
1695 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1696 AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM);
1697 case AArch64::LD1H_4Z_IMM_PSEUDO:
1698 return expandMultiVecPseudo(
1699 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1700 AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM);
1701 case AArch64::LD1W_4Z_IMM_PSEUDO:
1702 return expandMultiVecPseudo(
1703 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1704 AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM);
1705 case AArch64::LD1D_4Z_IMM_PSEUDO:
1706 return expandMultiVecPseudo(
1707 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1708 AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM);
1709 case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1710 return expandMultiVecPseudo(
1711 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1712 AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM);
1713 case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1714 return expandMultiVecPseudo(
1715 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1716 AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM);
1717 case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1718 return expandMultiVecPseudo(
1719 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1720 AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM);
1721 case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1722 return expandMultiVecPseudo(
1723 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1724 AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM);
1725 case AArch64::LD1B_4Z_PSEUDO:
1726 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1727 AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z,
1728 AArch64::LD1B_4Z_STRIDED);
1729 case AArch64::LD1H_4Z_PSEUDO:
1730 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1731 AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z,
1732 AArch64::LD1H_4Z_STRIDED);
1733 case AArch64::LD1W_4Z_PSEUDO:
1734 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1735 AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z,
1736 AArch64::LD1W_4Z_STRIDED);
1737 case AArch64::LD1D_4Z_PSEUDO:
1738 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1739 AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z,
1740 AArch64::LD1D_4Z_STRIDED);
1741 case AArch64::LDNT1B_4Z_PSEUDO:
1742 return expandMultiVecPseudo(
1743 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1744 AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED);
1745 case AArch64::LDNT1H_4Z_PSEUDO:
1746 return expandMultiVecPseudo(
1747 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1748 AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED);
1749 case AArch64::LDNT1W_4Z_PSEUDO:
1750 return expandMultiVecPseudo(
1751 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1752 AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED);
1753 case AArch64::LDNT1D_4Z_PSEUDO:
1754 return expandMultiVecPseudo(
1755 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1756 AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED);
1757 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO:
1758 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, 2);
1759 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO:
1760 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, 4);
1761 }
1762 return false;
1763}
1764
1765/// Iterate over the instructions in basic block MBB and expand any
1766/// pseudo instructions. Return true if anything was modified.
1767bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1768 bool Modified = false;
1769
1771 while (MBBI != E) {
1772 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1773 Modified |= expandMI(MBB, MBBI, NMBBI);
1774 MBBI = NMBBI;
1775 }
1776
1777 return Modified;
1778}
1779
1780bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1781 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1782
1783 bool Modified = false;
1784 for (auto &MBB : MF)
1785 Modified |= expandMBB(MBB);
1786 return Modified;
1787}
1788
1789/// Returns an instance of the pseudo instruction expansion pass.
1791 return new AArch64ExpandPseudo();
1792}
#define AARCH64_EXPAND_PSEUDO_NAME
MachineInstrBuilder & UseMI
static MachineInstr * createCallWithOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const AArch64InstrInfo *TII, unsigned Opcode, ArrayRef< MachineOperand > ExplicitOps, unsigned RegMaskStartIdx)
static MachineInstr * createCall(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const AArch64InstrInfo *TII, MachineOperand &CallTarget, unsigned RegMaskStartIdx)
MachineInstrBuilder MachineInstrBuilder & DefMI
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
The address of a basic block.
Definition: Constants.h:893
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:52
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Set of metadata that should be preserved when using BuildMI().
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImplicit=false)
CreateMachineInstr - Allocate a new MachineInstr.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
void moveCallSiteInfo(const MachineInstr *Old, const MachineInstr *New)
Move the call site info from Old to \New call site info.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void setDebugInstrNum(unsigned Num)
Set instruction number of this MachineInstr.
Definition: MachineInstr.h:550
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
CodeModel::Model getCodeModel() const
Returns the code model.
ArrayRef< MCPhysReg > getRegisters() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
int getSVERevInstr(uint16_t Opcode)
int getSVEPseudoMap(uint16_t Opcode)
int getSVENonRevInstr(uint16_t Opcode)
Key
PAL metadata keys.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1529
unsigned getDeadRegState(bool B)
void initializeAArch64ExpandPseudoPass(PassRegistry &)
FunctionPass * createAArch64ExpandPseudoPass()
Returns an instance of the pseudo instruction expansion pass.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
unsigned getKillRegState(bool B)
unsigned getRenamableRegState(bool B)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1903
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.