LLVM 20.0.0git
AArch64ExpandPseudoInsts.cpp
Go to the documentation of this file.
1//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands pseudo instructions into target
10// instructions to allow proper scheduling and other late optimizations. This
11// pass should be run after register allocation but before the post-regalloc
12// scheduling pass.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AArch64ExpandImm.h"
17#include "AArch64InstrInfo.h"
19#include "AArch64Subtarget.h"
31#include "llvm/IR/DebugLoc.h"
32#include "llvm/MC/MCInstrDesc.h"
33#include "llvm/Pass.h"
38#include <cassert>
39#include <cstdint>
40#include <iterator>
41#include <utility>
42
43using namespace llvm;
44
45#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
46
47namespace {
48
49class AArch64ExpandPseudo : public MachineFunctionPass {
50public:
51 const AArch64InstrInfo *TII;
52
53 static char ID;
54
55 AArch64ExpandPseudo() : MachineFunctionPass(ID) {
57 }
58
59 bool runOnMachineFunction(MachineFunction &Fn) override;
60
61 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
62
63private:
64 bool expandMBB(MachineBasicBlock &MBB);
67 bool expandMultiVecPseudo(MachineBasicBlock &MBB,
69 TargetRegisterClass ContiguousClass,
70 TargetRegisterClass StridedClass,
71 unsigned ContiguousOpc, unsigned StridedOpc);
73 unsigned BitSize);
74
75 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
78 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
79 unsigned ExtendImm, unsigned ZeroReg,
81 bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
84 bool expandSetTagLoop(MachineBasicBlock &MBB,
87 bool expandSVESpillFill(MachineBasicBlock &MBB,
89 unsigned N);
90 bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
93 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
95 MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB,
97 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
99};
100
101} // end anonymous namespace
102
103char AArch64ExpandPseudo::ID = 0;
104
105INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
106 AARCH64_EXPAND_PSEUDO_NAME, false, false)
107
108/// Transfer implicit operands on the pseudo instruction to the
109/// instructions created from the expansion.
110static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
112 const MCInstrDesc &Desc = OldMI.getDesc();
113 for (const MachineOperand &MO :
114 llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
115 assert(MO.isReg() && MO.getReg());
116 if (MO.isUse())
117 UseMI.add(MO);
118 else
119 DefMI.add(MO);
120 }
121}
122
123/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
124/// real move-immediate instructions to synthesize the immediate.
125bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
127 unsigned BitSize) {
128 MachineInstr &MI = *MBBI;
129 Register DstReg = MI.getOperand(0).getReg();
130 uint64_t RenamableState =
131 MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
132 uint64_t Imm = MI.getOperand(1).getImm();
133
134 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
135 // Useless def, and we don't want to risk creating an invalid ORR (which
136 // would really write to sp).
137 MI.eraseFromParent();
138 return true;
139 }
140
142 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
143 assert(Insn.size() != 0);
144
146 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
147 bool LastItem = std::next(I) == E;
148 switch (I->Opcode)
149 {
150 default: llvm_unreachable("unhandled!"); break;
151
152 case AArch64::ORRWri:
153 case AArch64::ORRXri:
154 if (I->Op1 == 0) {
155 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
156 .add(MI.getOperand(0))
157 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
158 .addImm(I->Op2));
159 } else {
160 Register DstReg = MI.getOperand(0).getReg();
161 bool DstIsDead = MI.getOperand(0).isDead();
162 MIBS.push_back(
163 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
164 .addReg(DstReg, RegState::Define |
165 getDeadRegState(DstIsDead && LastItem) |
166 RenamableState)
167 .addReg(DstReg)
168 .addImm(I->Op2));
169 }
170 break;
171 case AArch64::ORRWrs:
172 case AArch64::ORRXrs: {
173 Register DstReg = MI.getOperand(0).getReg();
174 bool DstIsDead = MI.getOperand(0).isDead();
175 MIBS.push_back(
176 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
177 .addReg(DstReg, RegState::Define |
178 getDeadRegState(DstIsDead && LastItem) |
179 RenamableState)
180 .addReg(DstReg)
181 .addReg(DstReg)
182 .addImm(I->Op2));
183 } break;
184 case AArch64::ANDXri:
185 case AArch64::EORXri:
186 if (I->Op1 == 0) {
187 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
188 .add(MI.getOperand(0))
189 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
190 .addImm(I->Op2));
191 } else {
192 Register DstReg = MI.getOperand(0).getReg();
193 bool DstIsDead = MI.getOperand(0).isDead();
194 MIBS.push_back(
195 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
196 .addReg(DstReg, RegState::Define |
197 getDeadRegState(DstIsDead && LastItem) |
198 RenamableState)
199 .addReg(DstReg)
200 .addImm(I->Op2));
201 }
202 break;
203 case AArch64::MOVNWi:
204 case AArch64::MOVNXi:
205 case AArch64::MOVZWi:
206 case AArch64::MOVZXi: {
207 bool DstIsDead = MI.getOperand(0).isDead();
208 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
209 .addReg(DstReg, RegState::Define |
210 getDeadRegState(DstIsDead && LastItem) |
211 RenamableState)
212 .addImm(I->Op1)
213 .addImm(I->Op2));
214 } break;
215 case AArch64::MOVKWi:
216 case AArch64::MOVKXi: {
217 Register DstReg = MI.getOperand(0).getReg();
218 bool DstIsDead = MI.getOperand(0).isDead();
219 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
220 .addReg(DstReg,
222 getDeadRegState(DstIsDead && LastItem) |
223 RenamableState)
224 .addReg(DstReg)
225 .addImm(I->Op1)
226 .addImm(I->Op2));
227 } break;
228 }
229 }
230 transferImpOps(MI, MIBS.front(), MIBS.back());
231 MI.eraseFromParent();
232 return true;
233}
234
235bool AArch64ExpandPseudo::expandCMP_SWAP(
237 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
238 MachineBasicBlock::iterator &NextMBBI) {
239 MachineInstr &MI = *MBBI;
240 MIMetadata MIMD(MI);
241 const MachineOperand &Dest = MI.getOperand(0);
242 Register StatusReg = MI.getOperand(1).getReg();
243 bool StatusDead = MI.getOperand(1).isDead();
244 // Duplicating undef operands into 2 instructions does not guarantee the same
245 // value on both; However undef should be replaced by xzr anyway.
246 assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
247 Register AddrReg = MI.getOperand(2).getReg();
248 Register DesiredReg = MI.getOperand(3).getReg();
249 Register NewReg = MI.getOperand(4).getReg();
250
252 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
253 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
254 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
255
256 MF->insert(++MBB.getIterator(), LoadCmpBB);
257 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
258 MF->insert(++StoreBB->getIterator(), DoneBB);
259
260 // .Lloadcmp:
261 // mov wStatus, 0
262 // ldaxr xDest, [xAddr]
263 // cmp xDest, xDesired
264 // b.ne .Ldone
265 if (!StatusDead)
266 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg)
267 .addImm(0).addImm(0);
268 BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg())
269 .addReg(AddrReg);
270 BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg)
271 .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
272 .addReg(DesiredReg)
273 .addImm(ExtendImm);
274 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc))
276 .addMBB(DoneBB)
277 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
278 LoadCmpBB->addSuccessor(DoneBB);
279 LoadCmpBB->addSuccessor(StoreBB);
280
281 // .Lstore:
282 // stlxr wStatus, xNew, [xAddr]
283 // cbnz wStatus, .Lloadcmp
284 BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg)
285 .addReg(NewReg)
286 .addReg(AddrReg);
287 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
288 .addReg(StatusReg, getKillRegState(StatusDead))
289 .addMBB(LoadCmpBB);
290 StoreBB->addSuccessor(LoadCmpBB);
291 StoreBB->addSuccessor(DoneBB);
292
293 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
294 DoneBB->transferSuccessors(&MBB);
295
296 MBB.addSuccessor(LoadCmpBB);
297
298 NextMBBI = MBB.end();
299 MI.eraseFromParent();
300
301 // Recompute livein lists.
302 LivePhysRegs LiveRegs;
303 computeAndAddLiveIns(LiveRegs, *DoneBB);
304 computeAndAddLiveIns(LiveRegs, *StoreBB);
305 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
306 // Do an extra pass around the loop to get loop carried registers right.
307 StoreBB->clearLiveIns();
308 computeAndAddLiveIns(LiveRegs, *StoreBB);
309 LoadCmpBB->clearLiveIns();
310 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
311
312 return true;
313}
314
315bool AArch64ExpandPseudo::expandCMP_SWAP_128(
317 MachineBasicBlock::iterator &NextMBBI) {
318 MachineInstr &MI = *MBBI;
319 MIMetadata MIMD(MI);
320 MachineOperand &DestLo = MI.getOperand(0);
321 MachineOperand &DestHi = MI.getOperand(1);
322 Register StatusReg = MI.getOperand(2).getReg();
323 bool StatusDead = MI.getOperand(2).isDead();
324 // Duplicating undef operands into 2 instructions does not guarantee the same
325 // value on both; However undef should be replaced by xzr anyway.
326 assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
327 Register AddrReg = MI.getOperand(3).getReg();
328 Register DesiredLoReg = MI.getOperand(4).getReg();
329 Register DesiredHiReg = MI.getOperand(5).getReg();
330 Register NewLoReg = MI.getOperand(6).getReg();
331 Register NewHiReg = MI.getOperand(7).getReg();
332
333 unsigned LdxpOp, StxpOp;
334
335 switch (MI.getOpcode()) {
336 case AArch64::CMP_SWAP_128_MONOTONIC:
337 LdxpOp = AArch64::LDXPX;
338 StxpOp = AArch64::STXPX;
339 break;
340 case AArch64::CMP_SWAP_128_RELEASE:
341 LdxpOp = AArch64::LDXPX;
342 StxpOp = AArch64::STLXPX;
343 break;
344 case AArch64::CMP_SWAP_128_ACQUIRE:
345 LdxpOp = AArch64::LDAXPX;
346 StxpOp = AArch64::STXPX;
347 break;
348 case AArch64::CMP_SWAP_128:
349 LdxpOp = AArch64::LDAXPX;
350 StxpOp = AArch64::STLXPX;
351 break;
352 default:
353 llvm_unreachable("Unexpected opcode");
354 }
355
357 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
358 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
359 auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
360 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
361
362 MF->insert(++MBB.getIterator(), LoadCmpBB);
363 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
364 MF->insert(++StoreBB->getIterator(), FailBB);
365 MF->insert(++FailBB->getIterator(), DoneBB);
366
367 // .Lloadcmp:
368 // ldaxp xDestLo, xDestHi, [xAddr]
369 // cmp xDestLo, xDesiredLo
370 // sbcs xDestHi, xDesiredHi
371 // b.ne .Ldone
372 BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp))
373 .addReg(DestLo.getReg(), RegState::Define)
374 .addReg(DestHi.getReg(), RegState::Define)
375 .addReg(AddrReg);
376 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
377 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
378 .addReg(DesiredLoReg)
379 .addImm(0);
380 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
381 .addUse(AArch64::WZR)
382 .addUse(AArch64::WZR)
384 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
385 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
386 .addReg(DesiredHiReg)
387 .addImm(0);
388 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
389 .addUse(StatusReg, RegState::Kill)
390 .addUse(StatusReg, RegState::Kill)
392 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW))
393 .addUse(StatusReg, getKillRegState(StatusDead))
394 .addMBB(FailBB);
395 LoadCmpBB->addSuccessor(FailBB);
396 LoadCmpBB->addSuccessor(StoreBB);
397
398 // .Lstore:
399 // stlxp wStatus, xNewLo, xNewHi, [xAddr]
400 // cbnz wStatus, .Lloadcmp
401 BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg)
402 .addReg(NewLoReg)
403 .addReg(NewHiReg)
404 .addReg(AddrReg);
405 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
406 .addReg(StatusReg, getKillRegState(StatusDead))
407 .addMBB(LoadCmpBB);
408 BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB);
409 StoreBB->addSuccessor(LoadCmpBB);
410 StoreBB->addSuccessor(DoneBB);
411
412 // .Lfail:
413 // stlxp wStatus, xDestLo, xDestHi, [xAddr]
414 // cbnz wStatus, .Lloadcmp
415 BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg)
416 .addReg(DestLo.getReg())
417 .addReg(DestHi.getReg())
418 .addReg(AddrReg);
419 BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW))
420 .addReg(StatusReg, getKillRegState(StatusDead))
421 .addMBB(LoadCmpBB);
422 FailBB->addSuccessor(LoadCmpBB);
423 FailBB->addSuccessor(DoneBB);
424
425 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
426 DoneBB->transferSuccessors(&MBB);
427
428 MBB.addSuccessor(LoadCmpBB);
429
430 NextMBBI = MBB.end();
431 MI.eraseFromParent();
432
433 // Recompute liveness bottom up.
434 LivePhysRegs LiveRegs;
435 computeAndAddLiveIns(LiveRegs, *DoneBB);
436 computeAndAddLiveIns(LiveRegs, *FailBB);
437 computeAndAddLiveIns(LiveRegs, *StoreBB);
438 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
439
440 // Do an extra pass in the loop to get the loop carried dependencies right.
441 FailBB->clearLiveIns();
442 computeAndAddLiveIns(LiveRegs, *FailBB);
443 StoreBB->clearLiveIns();
444 computeAndAddLiveIns(LiveRegs, *StoreBB);
445 LoadCmpBB->clearLiveIns();
446 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
447
448 return true;
449}
450
451/// \brief Expand Pseudos to Instructions with destructive operands.
452///
453/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
454/// or for fixing relaxed register allocation conditions to comply with
455/// the instructions register constraints. The latter case may be cheaper
456/// than setting the register constraints in the register allocator,
457/// since that will insert regular MOV instructions rather than MOVPRFX.
458///
459/// Example (after register allocation):
460///
461/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
462///
463/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
464/// * We cannot map directly to FSUB_ZPmZ_B because the register
465/// constraints of the instruction are not met.
466/// * Also the _ZERO specifies the false lanes need to be zeroed.
467///
468/// We first try to see if the destructive operand == result operand,
469/// if not, we try to swap the operands, e.g.
470///
471/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
472///
473/// But because FSUB_ZPmZ is not commutative, this is semantically
474/// different, so we need a reverse instruction:
475///
476/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
477///
478/// Then we implement the zeroing of the false lanes of Z0 by adding
479/// a zeroing MOVPRFX instruction:
480///
481/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
482/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
483///
484/// Note that this can only be done for _ZERO or _UNDEF variants where
485/// we can guarantee the false lanes to be zeroed (by implementing this)
486/// or that they are undef (don't care / not used), otherwise the
487/// swapping of operands is illegal because the operation is not
488/// (or cannot be emulated to be) fully commutative.
489bool AArch64ExpandPseudo::expand_DestructiveOp(
493 unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
494 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
495 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
496 bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
497 Register DstReg = MI.getOperand(0).getReg();
498 bool DstIsDead = MI.getOperand(0).isDead();
499 bool UseRev = false;
500 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
501
502 switch (DType) {
505 if (DstReg == MI.getOperand(3).getReg()) {
506 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
507 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
508 UseRev = true;
509 break;
510 }
511 [[fallthrough]];
514 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
515 break;
517 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
518 break;
520 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
521 if (DstReg == MI.getOperand(3).getReg()) {
522 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
523 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
524 UseRev = true;
525 } else if (DstReg == MI.getOperand(4).getReg()) {
526 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
527 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
528 UseRev = true;
529 }
530 break;
531 default:
532 llvm_unreachable("Unsupported Destructive Operand type");
533 }
534
535 // MOVPRFX can only be used if the destination operand
536 // is the destructive operand, not as any other operand,
537 // so the Destructive Operand must be unique.
538 bool DOPRegIsUnique = false;
539 switch (DType) {
541 DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();
542 break;
545 DOPRegIsUnique =
546 DstReg != MI.getOperand(DOPIdx).getReg() ||
547 MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
548 break;
551 DOPRegIsUnique = true;
552 break;
554 DOPRegIsUnique =
555 DstReg != MI.getOperand(DOPIdx).getReg() ||
556 (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
557 MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
558 break;
559 }
560
561 // Resolve the reverse opcode
562 if (UseRev) {
563 int NewOpcode;
564 // e.g. DIV -> DIVR
565 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
566 Opcode = NewOpcode;
567 // e.g. DIVR -> DIV
568 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
569 Opcode = NewOpcode;
570 }
571
572 // Get the right MOVPRFX
573 uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
574 unsigned MovPrfx, LSLZero, MovPrfxZero;
575 switch (ElementSize) {
578 MovPrfx = AArch64::MOVPRFX_ZZ;
579 LSLZero = AArch64::LSL_ZPmI_B;
580 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
581 break;
583 MovPrfx = AArch64::MOVPRFX_ZZ;
584 LSLZero = AArch64::LSL_ZPmI_H;
585 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
586 break;
588 MovPrfx = AArch64::MOVPRFX_ZZ;
589 LSLZero = AArch64::LSL_ZPmI_S;
590 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
591 break;
593 MovPrfx = AArch64::MOVPRFX_ZZ;
594 LSLZero = AArch64::LSL_ZPmI_D;
595 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
596 break;
597 default:
598 llvm_unreachable("Unsupported ElementSize");
599 }
600
601 //
602 // Create the destructive operation (if required)
603 //
604 MachineInstrBuilder PRFX, DOP;
605 if (FalseZero) {
606 // If we cannot prefix the requested instruction we'll instead emit a
607 // prefixed_zeroing_mov for DestructiveBinary.
608 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
611 "The destructive operand should be unique");
612 assert(ElementSize != AArch64::ElementSizeNone &&
613 "This instruction is unpredicated");
614
615 // Merge source operand into destination register
616 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
617 .addReg(DstReg, RegState::Define)
618 .addReg(MI.getOperand(PredIdx).getReg())
619 .addReg(MI.getOperand(DOPIdx).getReg());
620
621 // After the movprfx, the destructive operand is same as Dst
622 DOPIdx = 0;
623
624 // Create the additional LSL to zero the lanes when the DstReg is not
625 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
626 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
627 if ((DType == AArch64::DestructiveBinary ||
630 !DOPRegIsUnique) {
631 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
632 .addReg(DstReg, RegState::Define)
633 .add(MI.getOperand(PredIdx))
634 .addReg(DstReg)
635 .addImm(0);
636 }
637 } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
638 assert(DOPRegIsUnique && "The destructive operand should be unique");
639 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
640 .addReg(DstReg, RegState::Define)
641 .addReg(MI.getOperand(DOPIdx).getReg());
642 DOPIdx = 0;
643 }
644
645 //
646 // Create the destructive operation
647 //
648 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
649 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
650
651 switch (DType) {
653 DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
654 .add(MI.getOperand(PredIdx))
655 .add(MI.getOperand(SrcIdx));
656 break;
661 DOP.add(MI.getOperand(PredIdx))
662 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
663 .add(MI.getOperand(SrcIdx));
664 break;
666 DOP.add(MI.getOperand(PredIdx))
667 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
668 .add(MI.getOperand(SrcIdx))
669 .add(MI.getOperand(Src2Idx));
670 break;
671 }
672
673 if (PRFX) {
675 transferImpOps(MI, PRFX, DOP);
676 } else
677 transferImpOps(MI, DOP, DOP);
678
679 MI.eraseFromParent();
680 return true;
681}
682
683bool AArch64ExpandPseudo::expandSetTagLoop(
685 MachineBasicBlock::iterator &NextMBBI) {
686 MachineInstr &MI = *MBBI;
687 DebugLoc DL = MI.getDebugLoc();
688 Register SizeReg = MI.getOperand(0).getReg();
689 Register AddressReg = MI.getOperand(1).getReg();
690
692
693 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
694 const unsigned OpCode1 =
695 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
696 const unsigned OpCode2 =
697 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
698
699 unsigned Size = MI.getOperand(2).getImm();
700 assert(Size > 0 && Size % 16 == 0);
701 if (Size % (16 * 2) != 0) {
702 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
703 .addReg(AddressReg)
704 .addReg(AddressReg)
705 .addImm(1);
706 Size -= 16;
707 }
709 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
710 .addImm(Size);
711 expandMOVImm(MBB, I, 64);
712
713 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
714 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
715
716 MF->insert(++MBB.getIterator(), LoopBB);
717 MF->insert(++LoopBB->getIterator(), DoneBB);
718
719 BuildMI(LoopBB, DL, TII->get(OpCode2))
720 .addDef(AddressReg)
721 .addReg(AddressReg)
722 .addReg(AddressReg)
723 .addImm(2)
725 .setMIFlags(MI.getFlags());
726 BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri))
727 .addDef(SizeReg)
728 .addReg(SizeReg)
729 .addImm(16 * 2)
730 .addImm(0);
731 BuildMI(LoopBB, DL, TII->get(AArch64::Bcc))
733 .addMBB(LoopBB)
734 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
735
736 LoopBB->addSuccessor(LoopBB);
737 LoopBB->addSuccessor(DoneBB);
738
739 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
740 DoneBB->transferSuccessors(&MBB);
741
742 MBB.addSuccessor(LoopBB);
743
744 NextMBBI = MBB.end();
745 MI.eraseFromParent();
746 // Recompute liveness bottom up.
747 LivePhysRegs LiveRegs;
748 computeAndAddLiveIns(LiveRegs, *DoneBB);
749 computeAndAddLiveIns(LiveRegs, *LoopBB);
750 // Do an extra pass in the loop to get the loop carried dependencies right.
751 // FIXME: is this necessary?
752 LoopBB->clearLiveIns();
753 computeAndAddLiveIns(LiveRegs, *LoopBB);
754 DoneBB->clearLiveIns();
755 computeAndAddLiveIns(LiveRegs, *DoneBB);
756
757 return true;
758}
759
760bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
762 unsigned Opc, unsigned N) {
763 assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
764 Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
765 "Unexpected opcode");
766 unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI)
768 : 0;
769 unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
770 ? AArch64::zsub0
771 : AArch64::psub0;
772 const TargetRegisterInfo *TRI =
774 MachineInstr &MI = *MBBI;
775 for (unsigned Offset = 0; Offset < N; ++Offset) {
776 int ImmOffset = MI.getOperand(2).getImm() + Offset;
777 bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
778 assert(ImmOffset >= -256 && ImmOffset < 256 &&
779 "Immediate spill offset out of range");
780 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
781 .addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset),
782 RState)
783 .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
784 .addImm(ImmOffset);
785 }
786 MI.eraseFromParent();
787 return true;
788}
789
790// Create a call with the passed opcode and explicit operands, copying over all
791// the implicit operands from *MBBI, starting at the regmask.
794 const AArch64InstrInfo *TII,
795 unsigned Opcode,
796 ArrayRef<MachineOperand> ExplicitOps,
797 unsigned RegMaskStartIdx) {
798 // Build the MI, with explicit operands first (including the call target).
799 MachineInstr *Call = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode))
800 .add(ExplicitOps)
801 .getInstr();
802
803 // Register arguments are added during ISel, but cannot be added as explicit
804 // operands of the branch as it expects to be B <target> which is only one
805 // operand. Instead they are implicit operands used by the branch.
806 while (!MBBI->getOperand(RegMaskStartIdx).isRegMask()) {
807 const MachineOperand &MOP = MBBI->getOperand(RegMaskStartIdx);
808 assert(MOP.isReg() && "can only add register operands");
809 Call->addOperand(MachineOperand::CreateReg(
810 MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,
811 /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
812 RegMaskStartIdx++;
813 }
814 for (const MachineOperand &MO :
815 llvm::drop_begin(MBBI->operands(), RegMaskStartIdx))
816 Call->addOperand(MO);
817
818 return Call;
819}
820
821// Create a call to CallTarget, copying over all the operands from *MBBI,
822// starting at the regmask.
825 const AArch64InstrInfo *TII,
826 MachineOperand &CallTarget,
827 unsigned RegMaskStartIdx) {
828 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
829
830 assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
831 "invalid operand for regular call");
832 return createCallWithOps(MBB, MBBI, TII, Opc, CallTarget, RegMaskStartIdx);
833}
834
835bool AArch64ExpandPseudo::expandCALL_RVMARKER(
837 // Expand CALL_RVMARKER pseudo to:
838 // - a branch to the call target, followed by
839 // - the special `mov x29, x29` marker, and
840 // - another branch, to the runtime function
841 // Mark the sequence as bundle, to avoid passes moving other code in between.
842 MachineInstr &MI = *MBBI;
843 MachineOperand &RVTarget = MI.getOperand(0);
844 assert(RVTarget.isGlobal() && "invalid operand for attached call");
845
846 MachineInstr *OriginalCall = nullptr;
847
848 if (MI.getOpcode() == AArch64::BLRA_RVMARKER) {
849 // ptrauth call.
850 const MachineOperand &CallTarget = MI.getOperand(1);
851 const MachineOperand &Key = MI.getOperand(2);
852 const MachineOperand &IntDisc = MI.getOperand(3);
853 const MachineOperand &AddrDisc = MI.getOperand(4);
854
855 assert((Key.getImm() == AArch64PACKey::IA ||
856 Key.getImm() == AArch64PACKey::IB) &&
857 "Invalid auth call key");
858
859 MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc};
860
861 OriginalCall = createCallWithOps(MBB, MBBI, TII, AArch64::BLRA, Ops,
862 /*RegMaskStartIdx=*/5);
863 } else {
864 assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI");
865 OriginalCall = createCall(MBB, MBBI, TII, MI.getOperand(1),
866 // Regmask starts after the RV and call targets.
867 /*RegMaskStartIdx=*/2);
868 }
869
870 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
871 .addReg(AArch64::FP, RegState::Define)
872 .addReg(AArch64::XZR)
873 .addReg(AArch64::FP)
874 .addImm(0);
875
876 auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
877 .add(RVTarget)
878 .getInstr();
879
880 if (MI.shouldUpdateCallSiteInfo())
881 MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);
882
883 MI.eraseFromParent();
884 finalizeBundle(MBB, OriginalCall->getIterator(),
885 std::next(RVCall->getIterator()));
886 return true;
887}
888
889bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
891 // Expand CALL_BTI pseudo to:
892 // - a branch to the call target
893 // - a BTI instruction
894 // Mark the sequence as a bundle, to avoid passes moving other code in
895 // between.
896 MachineInstr &MI = *MBBI;
897 MachineInstr *Call = createCall(MBB, MBBI, TII, MI.getOperand(0),
898 // Regmask starts after the call target.
899 /*RegMaskStartIdx=*/1);
900
901 Call->setCFIType(*MBB.getParent(), MI.getCFIType());
902
903 MachineInstr *BTI =
904 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
905 // BTI J so that setjmp can to BR to this.
906 .addImm(36)
907 .getInstr();
908
909 if (MI.shouldUpdateCallSiteInfo())
910 MBB.getParent()->moveCallSiteInfo(&MI, Call);
911
912 MI.eraseFromParent();
913 finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
914 return true;
915}
916
917bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
919 Register CtxReg = MBBI->getOperand(0).getReg();
920 Register BaseReg = MBBI->getOperand(1).getReg();
921 int Offset = MBBI->getOperand(2).getImm();
922 DebugLoc DL(MBBI->getDebugLoc());
923 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
924
925 if (STI.getTargetTriple().getArchName() != "arm64e") {
926 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
927 .addUse(CtxReg)
928 .addUse(BaseReg)
929 .addImm(Offset / 8)
932 return true;
933 }
934
935 // We need to sign the context in an address-discriminated way. 0xc31a is a
936 // fixed random value, chosen as part of the ABI.
937 // add x16, xBase, #Offset
938 // movk x16, #0xc31a, lsl #48
939 // mov x17, x22/xzr
940 // pacdb x17, x16
941 // str x17, [xBase, #Offset]
942 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
943 BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
944 .addUse(BaseReg)
945 .addImm(abs(Offset))
946 .addImm(0)
948 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
949 .addUse(AArch64::X16)
950 .addImm(0xc31a)
951 .addImm(48)
953 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
954 // move it somewhere before signing.
955 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
956 .addUse(AArch64::XZR)
957 .addUse(CtxReg)
958 .addImm(0)
960 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
961 .addUse(AArch64::X17)
962 .addUse(AArch64::X16)
964 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
965 .addUse(AArch64::X17)
966 .addUse(BaseReg)
967 .addImm(Offset / 8)
969
971 return true;
972}
973
975AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB,
977 MachineInstr &MI = *MBBI;
978 assert((std::next(MBBI) != MBB.end() ||
979 MI.getParent()->successors().begin() !=
980 MI.getParent()->successors().end()) &&
981 "Unexpected unreachable in block that restores ZA");
982
983 // Compare TPIDR2_EL0 value against 0.
984 DebugLoc DL = MI.getDebugLoc();
985 MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX))
986 .add(MI.getOperand(0));
987
988 // Split MBB and create two new blocks:
989 // - MBB now contains all instructions before RestoreZAPseudo.
990 // - SMBB contains the RestoreZAPseudo instruction only.
991 // - EndBB contains all instructions after RestoreZAPseudo.
992 MachineInstr &PrevMI = *std::prev(MBBI);
993 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
994 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
995 ? *SMBB->successors().begin()
996 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
997
998 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
999 Cbz.addMBB(SMBB);
1000 BuildMI(&MBB, DL, TII->get(AArch64::B))
1001 .addMBB(EndBB);
1002 MBB.addSuccessor(EndBB);
1003
1004 // Replace the pseudo with a call (BL).
1006 BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL));
1007 MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit);
1008 for (unsigned I = 2; I < MI.getNumOperands(); ++I)
1009 MIB.add(MI.getOperand(I));
1010 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1011
1012 MI.eraseFromParent();
1013 return EndBB;
1014}
1015
1017AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
1019 MachineInstr &MI = *MBBI;
1020 // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
1021 // Exception handling code generated by Clang may introduce unreachables and it
1022 // seems unnecessary to restore pstate.sm when that happens. Note that it is
1023 // not just an optimisation, the code below expects a successor instruction/block
1024 // in order to split the block at MBBI.
1025 if (std::next(MBBI) == MBB.end() &&
1026 MI.getParent()->successors().begin() ==
1027 MI.getParent()->successors().end()) {
1028 MI.eraseFromParent();
1029 return &MBB;
1030 }
1031
1032 // Expand the pseudo into smstart or smstop instruction. The pseudo has the
1033 // following operands:
1034 //
1035 // MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask>
1036 //
1037 // The pseudo is expanded into a conditional smstart/smstop, with a
1038 // check if pstate.sm (register) equals the expected value, and if not,
1039 // invokes the smstart/smstop.
1040 //
1041 // As an example, the following block contains a normal call from a
1042 // streaming-compatible function:
1043 //
1044 // OrigBB:
1045 // MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTOP
1046 // bl @normal_callee
1047 // MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTART
1048 //
1049 // ...which will be transformed into:
1050 //
1051 // OrigBB:
1052 // TBNZx %0:gpr64, 0, SMBB
1053 // b EndBB
1054 //
1055 // SMBB:
1056 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP
1057 //
1058 // EndBB:
1059 // bl @normal_callee
1060 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART
1061 //
1062 DebugLoc DL = MI.getDebugLoc();
1063
1064 // Create the conditional branch based on the third operand of the
1065 // instruction, which tells us if we are wrapping a normal or streaming
1066 // function.
1067 // We test the live value of pstate.sm and toggle pstate.sm if this is not the
1068 // expected value for the callee (0 for a normal callee and 1 for a streaming
1069 // callee).
1070 unsigned Opc;
1071 switch (MI.getOperand(2).getImm()) {
1072 case AArch64SME::Always:
1073 llvm_unreachable("Should have matched to instruction directly");
1075 Opc = AArch64::TBNZW;
1076 break;
1078 Opc = AArch64::TBZW;
1079 break;
1080 }
1081 auto PStateSM = MI.getOperand(3).getReg();
1083 unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
1085 BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);
1086
1087 // Split MBB and create two new blocks:
1088 // - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
1089 // - SMBB contains the MSRcond_pstatesvcrImm1 instruction only.
1090 // - EndBB contains all instructions after MSRcond_pstatesvcrImm1.
1091 MachineInstr &PrevMI = *std::prev(MBBI);
1092 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
1093 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
1094 ? *SMBB->successors().begin()
1095 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
1096
1097 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
1098 Tbx.addMBB(SMBB);
1099 BuildMI(&MBB, DL, TII->get(AArch64::B))
1100 .addMBB(EndBB);
1101 MBB.addSuccessor(EndBB);
1102
1103 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1104 MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(),
1105 TII->get(AArch64::MSRpstatesvcrImm1));
1106 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1107 // these contain the CopyFromReg for the first argument and the flag to
1108 // indicate whether the callee is streaming or normal).
1109 MIB.add(MI.getOperand(0));
1110 MIB.add(MI.getOperand(1));
1111 for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1112 MIB.add(MI.getOperand(i));
1113
1114 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1115
1116 MI.eraseFromParent();
1117 return EndBB;
1118}
1119
1120bool AArch64ExpandPseudo::expandMultiVecPseudo(
1122 TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1123 unsigned ContiguousOp, unsigned StridedOpc) {
1124 MachineInstr &MI = *MBBI;
1125 Register Tuple = MI.getOperand(0).getReg();
1126
1127 auto ContiguousRange = ContiguousClass.getRegisters();
1128 auto StridedRange = StridedClass.getRegisters();
1129 unsigned Opc;
1130 if (llvm::is_contained(ContiguousRange, Tuple.asMCReg())) {
1131 Opc = ContiguousOp;
1132 } else if (llvm::is_contained(StridedRange, Tuple.asMCReg())) {
1133 Opc = StridedOpc;
1134 } else
1135 llvm_unreachable("Cannot expand Multi-Vector pseudo");
1136
1137 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
1138 .add(MI.getOperand(0))
1139 .add(MI.getOperand(1))
1140 .add(MI.getOperand(2))
1141 .add(MI.getOperand(3));
1142 transferImpOps(MI, MIB, MIB);
1143 MI.eraseFromParent();
1144 return true;
1145}
1146
1147/// If MBBI references a pseudo instruction that should be expanded here,
1148/// do the expansion and return true. Otherwise return false.
1149bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
1151 MachineBasicBlock::iterator &NextMBBI) {
1152 MachineInstr &MI = *MBBI;
1153 unsigned Opcode = MI.getOpcode();
1154
1155 // Check if we can expand the destructive op
1156 int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
1157 if (OrigInstr != -1) {
1158 auto &Orig = TII->get(OrigInstr);
1159 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1161 return expand_DestructiveOp(MI, MBB, MBBI);
1162 }
1163 }
1164
1165 switch (Opcode) {
1166 default:
1167 break;
1168
1169 case AArch64::BSPv8i8:
1170 case AArch64::BSPv16i8: {
1171 Register DstReg = MI.getOperand(0).getReg();
1172 if (DstReg == MI.getOperand(3).getReg()) {
1173 // Expand to BIT
1174 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1175 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1176 : AArch64::BITv16i8))
1177 .add(MI.getOperand(0))
1178 .add(MI.getOperand(3))
1179 .add(MI.getOperand(2))
1180 .add(MI.getOperand(1));
1181 } else if (DstReg == MI.getOperand(2).getReg()) {
1182 // Expand to BIF
1183 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1184 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1185 : AArch64::BIFv16i8))
1186 .add(MI.getOperand(0))
1187 .add(MI.getOperand(2))
1188 .add(MI.getOperand(3))
1189 .add(MI.getOperand(1));
1190 } else {
1191 // Expand to BSL, use additional move if required
1192 if (DstReg == MI.getOperand(1).getReg()) {
1193 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1194 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1195 : AArch64::BSLv16i8))
1196 .add(MI.getOperand(0))
1197 .add(MI.getOperand(1))
1198 .add(MI.getOperand(2))
1199 .add(MI.getOperand(3));
1200 } else {
1201 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1202 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1203 : AArch64::ORRv16i8))
1204 .addReg(DstReg,
1206 getRenamableRegState(MI.getOperand(0).isRenamable()))
1207 .add(MI.getOperand(1))
1208 .add(MI.getOperand(1));
1209 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1210 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1211 : AArch64::BSLv16i8))
1212 .add(MI.getOperand(0))
1213 .addReg(DstReg,
1215 getRenamableRegState(MI.getOperand(0).isRenamable()))
1216 .add(MI.getOperand(2))
1217 .add(MI.getOperand(3));
1218 }
1219 }
1220 MI.eraseFromParent();
1221 return true;
1222 }
1223
1224 case AArch64::ADDWrr:
1225 case AArch64::SUBWrr:
1226 case AArch64::ADDXrr:
1227 case AArch64::SUBXrr:
1228 case AArch64::ADDSWrr:
1229 case AArch64::SUBSWrr:
1230 case AArch64::ADDSXrr:
1231 case AArch64::SUBSXrr:
1232 case AArch64::ANDWrr:
1233 case AArch64::ANDXrr:
1234 case AArch64::BICWrr:
1235 case AArch64::BICXrr:
1236 case AArch64::ANDSWrr:
1237 case AArch64::ANDSXrr:
1238 case AArch64::BICSWrr:
1239 case AArch64::BICSXrr:
1240 case AArch64::EONWrr:
1241 case AArch64::EONXrr:
1242 case AArch64::EORWrr:
1243 case AArch64::EORXrr:
1244 case AArch64::ORNWrr:
1245 case AArch64::ORNXrr:
1246 case AArch64::ORRWrr:
1247 case AArch64::ORRXrr: {
1248 unsigned Opcode;
1249 switch (MI.getOpcode()) {
1250 default:
1251 return false;
1252 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
1253 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
1254 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
1255 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
1256 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
1257 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
1258 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
1259 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
1260 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
1261 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
1262 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
1263 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
1264 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
1265 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
1266 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
1267 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
1268 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
1269 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
1270 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
1271 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
1272 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
1273 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
1274 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
1275 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
1276 }
1277 MachineFunction &MF = *MBB.getParent();
1278 // Try to create new inst without implicit operands added.
1279 MachineInstr *NewMI = MF.CreateMachineInstr(
1280 TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
1281 MBB.insert(MBBI, NewMI);
1282 MachineInstrBuilder MIB1(MF, NewMI);
1283 MIB1->setPCSections(MF, MI.getPCSections());
1284 MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
1285 .add(MI.getOperand(1))
1286 .add(MI.getOperand(2))
1288 transferImpOps(MI, MIB1, MIB1);
1289 if (auto DebugNumber = MI.peekDebugInstrNum())
1290 NewMI->setDebugInstrNum(DebugNumber);
1291 MI.eraseFromParent();
1292 return true;
1293 }
1294
1295 case AArch64::LOADgot: {
1297 Register DstReg = MI.getOperand(0).getReg();
1298 const MachineOperand &MO1 = MI.getOperand(1);
1299 unsigned Flags = MO1.getTargetFlags();
1300
1301 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1302 // Tiny codemodel expand to LDR
1303 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1304 TII->get(AArch64::LDRXl), DstReg);
1305
1306 if (MO1.isGlobal()) {
1307 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
1308 } else if (MO1.isSymbol()) {
1309 MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
1310 } else {
1311 assert(MO1.isCPI() &&
1312 "Only expect globals, externalsymbols, or constant pools");
1313 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
1314 }
1315 } else {
1316 // Small codemodel expand into ADRP + LDR.
1317 MachineFunction &MF = *MI.getParent()->getParent();
1318 DebugLoc DL = MI.getDebugLoc();
1319 MachineInstrBuilder MIB1 =
1320 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
1321
1325 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
1326 unsigned DstFlags = MI.getOperand(0).getTargetFlags();
1327 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
1328 .addDef(Reg32)
1329 .addReg(DstReg, RegState::Kill)
1330 .addReg(DstReg, DstFlags | RegState::Implicit);
1331 } else {
1332 Register DstReg = MI.getOperand(0).getReg();
1333 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
1334 .add(MI.getOperand(0))
1335 .addUse(DstReg, RegState::Kill);
1336 }
1337
1338 if (MO1.isGlobal()) {
1339 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
1340 MIB2.addGlobalAddress(MO1.getGlobal(), 0,
1342 } else if (MO1.isSymbol()) {
1344 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
1347 } else {
1348 assert(MO1.isCPI() &&
1349 "Only expect globals, externalsymbols, or constant pools");
1350 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1351 Flags | AArch64II::MO_PAGE);
1352 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1353 Flags | AArch64II::MO_PAGEOFF |
1355 }
1356
1357 transferImpOps(MI, MIB1, MIB2);
1358 }
1359 MI.eraseFromParent();
1360 return true;
1361 }
1362 case AArch64::MOVaddrBA: {
1363 MachineFunction &MF = *MI.getParent()->getParent();
1365 // blockaddress expressions have to come from a constant pool because the
1366 // largest addend (and hence offset within a function) allowed for ADRP is
1367 // only 8MB.
1368 const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
1369 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1370
1372 unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
1373
1374 Register DstReg = MI.getOperand(0).getReg();
1375 auto MIB1 =
1376 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1378 auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1379 TII->get(AArch64::LDRXui), DstReg)
1380 .addUse(DstReg)
1383 transferImpOps(MI, MIB1, MIB2);
1384 MI.eraseFromParent();
1385 return true;
1386 }
1387 }
1388 [[fallthrough]];
1389 case AArch64::MOVaddr:
1390 case AArch64::MOVaddrJT:
1391 case AArch64::MOVaddrCP:
1392 case AArch64::MOVaddrTLS:
1393 case AArch64::MOVaddrEXT: {
1394 // Expand into ADRP + ADD.
1395 Register DstReg = MI.getOperand(0).getReg();
1396 assert(DstReg != AArch64::XZR);
1397 MachineInstrBuilder MIB1 =
1398 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1399 .add(MI.getOperand(1));
1400
1401 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1402 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1403 // We do so by creating a MOVK that sets bits 48-63 of the register to
1404 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1405 // the small code model so we can assume a binary size of <= 4GB, which
1406 // makes the untagged PC relative offset positive. The binary must also be
1407 // loaded into address range [0, 2^48). Both of these properties need to
1408 // be ensured at runtime when using tagged addresses.
1409 auto Tag = MI.getOperand(1);
1410 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1411 Tag.setOffset(0x100000000);
1412 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1413 .addReg(DstReg)
1414 .add(Tag)
1415 .addImm(48);
1416 }
1417
1418 MachineInstrBuilder MIB2 =
1419 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1420 .add(MI.getOperand(0))
1421 .addReg(DstReg)
1422 .add(MI.getOperand(2))
1423 .addImm(0);
1424
1425 transferImpOps(MI, MIB1, MIB2);
1426 MI.eraseFromParent();
1427 return true;
1428 }
1429 case AArch64::ADDlowTLS:
1430 // Produce a plain ADD
1431 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1432 .add(MI.getOperand(0))
1433 .add(MI.getOperand(1))
1434 .add(MI.getOperand(2))
1435 .addImm(0);
1436 MI.eraseFromParent();
1437 return true;
1438
1439 case AArch64::MOVbaseTLS: {
1440 Register DstReg = MI.getOperand(0).getReg();
1441 auto SysReg = AArch64SysReg::TPIDR_EL0;
1443 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1444 SysReg = AArch64SysReg::TPIDR_EL3;
1445 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1446 SysReg = AArch64SysReg::TPIDR_EL2;
1447 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1448 SysReg = AArch64SysReg::TPIDR_EL1;
1449 else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1450 SysReg = AArch64SysReg::TPIDRRO_EL0;
1451 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1452 .addImm(SysReg);
1453 MI.eraseFromParent();
1454 return true;
1455 }
1456
1457 case AArch64::MOVi32imm:
1458 return expandMOVImm(MBB, MBBI, 32);
1459 case AArch64::MOVi64imm:
1460 return expandMOVImm(MBB, MBBI, 64);
1461 case AArch64::RET_ReallyLR: {
1462 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1463 // function and missing live-ins. We are fine in practice because callee
1464 // saved register handling ensures the register value is restored before
1465 // RET, but we need the undef flag here to appease the MachineVerifier
1466 // liveness checks.
1468 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1469 .addReg(AArch64::LR, RegState::Undef);
1470 transferImpOps(MI, MIB, MIB);
1471 MI.eraseFromParent();
1472 return true;
1473 }
1474 case AArch64::CMP_SWAP_8:
1475 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1476 AArch64::SUBSWrx,
1478 AArch64::WZR, NextMBBI);
1479 case AArch64::CMP_SWAP_16:
1480 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1481 AArch64::SUBSWrx,
1483 AArch64::WZR, NextMBBI);
1484 case AArch64::CMP_SWAP_32:
1485 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1486 AArch64::SUBSWrs,
1488 AArch64::WZR, NextMBBI);
1489 case AArch64::CMP_SWAP_64:
1490 return expandCMP_SWAP(MBB, MBBI,
1491 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1493 AArch64::XZR, NextMBBI);
1494 case AArch64::CMP_SWAP_128:
1495 case AArch64::CMP_SWAP_128_RELEASE:
1496 case AArch64::CMP_SWAP_128_ACQUIRE:
1497 case AArch64::CMP_SWAP_128_MONOTONIC:
1498 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1499
1500 case AArch64::AESMCrrTied:
1501 case AArch64::AESIMCrrTied: {
1503 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1504 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1505 AArch64::AESIMCrr))
1506 .add(MI.getOperand(0))
1507 .add(MI.getOperand(1));
1508 transferImpOps(MI, MIB, MIB);
1509 MI.eraseFromParent();
1510 return true;
1511 }
1512 case AArch64::IRGstack: {
1513 MachineFunction &MF = *MBB.getParent();
1515 const AArch64FrameLowering *TFI =
1516 MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1517
1518 // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1519 // almost always point to SP-after-prologue; if not, emit a longer
1520 // instruction sequence.
1521 int BaseOffset = -AFI->getTaggedBasePointerOffset();
1522 Register FrameReg;
1523 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1524 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1525 /*PreferFP=*/false,
1526 /*ForSimm=*/true);
1527 Register SrcReg = FrameReg;
1528 if (FrameRegOffset) {
1529 // Use output register as temporary.
1530 SrcReg = MI.getOperand(0).getReg();
1531 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1532 FrameRegOffset, TII);
1533 }
1534 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1535 .add(MI.getOperand(0))
1536 .addUse(SrcReg)
1537 .add(MI.getOperand(2));
1538 MI.eraseFromParent();
1539 return true;
1540 }
1541 case AArch64::TAGPstack: {
1542 int64_t Offset = MI.getOperand(2).getImm();
1543 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1544 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1545 .add(MI.getOperand(0))
1546 .add(MI.getOperand(1))
1547 .addImm(std::abs(Offset))
1548 .add(MI.getOperand(4));
1549 MI.eraseFromParent();
1550 return true;
1551 }
1552 case AArch64::STGloop_wback:
1553 case AArch64::STZGloop_wback:
1554 return expandSetTagLoop(MBB, MBBI, NextMBBI);
1555 case AArch64::STGloop:
1556 case AArch64::STZGloop:
1558 "Non-writeback variants of STGloop / STZGloop should not "
1559 "survive past PrologEpilogInserter.");
1560 case AArch64::STR_ZZZZXI:
1561 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1562 case AArch64::STR_ZZZXI:
1563 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1564 case AArch64::STR_ZZXI:
1565 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1566 case AArch64::STR_PPXI:
1567 return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);
1568 case AArch64::LDR_ZZZZXI:
1569 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1570 case AArch64::LDR_ZZZXI:
1571 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1572 case AArch64::LDR_ZZXI:
1573 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1574 case AArch64::LDR_PPXI:
1575 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);
1576 case AArch64::BLR_RVMARKER:
1577 case AArch64::BLRA_RVMARKER:
1578 return expandCALL_RVMARKER(MBB, MBBI);
1579 case AArch64::BLR_BTI:
1580 return expandCALL_BTI(MBB, MBBI);
1581 case AArch64::StoreSwiftAsyncContext:
1582 return expandStoreSwiftAsyncContext(MBB, MBBI);
1583 case AArch64::RestoreZAPseudo: {
1584 auto *NewMBB = expandRestoreZA(MBB, MBBI);
1585 if (NewMBB != &MBB)
1586 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1587 return true;
1588 }
1589 case AArch64::MSRpstatePseudo: {
1590 auto *NewMBB = expandCondSMToggle(MBB, MBBI);
1591 if (NewMBB != &MBB)
1592 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1593 return true;
1594 }
1595 case AArch64::COALESCER_BARRIER_FPR16:
1596 case AArch64::COALESCER_BARRIER_FPR32:
1597 case AArch64::COALESCER_BARRIER_FPR64:
1598 case AArch64::COALESCER_BARRIER_FPR128:
1599 MI.eraseFromParent();
1600 return true;
1601 case AArch64::LD1B_2Z_IMM_PSEUDO:
1602 return expandMultiVecPseudo(
1603 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1604 AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM);
1605 case AArch64::LD1H_2Z_IMM_PSEUDO:
1606 return expandMultiVecPseudo(
1607 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1608 AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM);
1609 case AArch64::LD1W_2Z_IMM_PSEUDO:
1610 return expandMultiVecPseudo(
1611 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1612 AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM);
1613 case AArch64::LD1D_2Z_IMM_PSEUDO:
1614 return expandMultiVecPseudo(
1615 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1616 AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM);
1617 case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1618 return expandMultiVecPseudo(
1619 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1620 AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM);
1621 case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1622 return expandMultiVecPseudo(
1623 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1624 AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM);
1625 case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1626 return expandMultiVecPseudo(
1627 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1628 AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM);
1629 case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1630 return expandMultiVecPseudo(
1631 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1632 AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM);
1633 case AArch64::LD1B_2Z_PSEUDO:
1634 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1635 AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z,
1636 AArch64::LD1B_2Z_STRIDED);
1637 case AArch64::LD1H_2Z_PSEUDO:
1638 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1639 AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z,
1640 AArch64::LD1H_2Z_STRIDED);
1641 case AArch64::LD1W_2Z_PSEUDO:
1642 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1643 AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z,
1644 AArch64::LD1W_2Z_STRIDED);
1645 case AArch64::LD1D_2Z_PSEUDO:
1646 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1647 AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z,
1648 AArch64::LD1D_2Z_STRIDED);
1649 case AArch64::LDNT1B_2Z_PSEUDO:
1650 return expandMultiVecPseudo(
1651 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1652 AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED);
1653 case AArch64::LDNT1H_2Z_PSEUDO:
1654 return expandMultiVecPseudo(
1655 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1656 AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED);
1657 case AArch64::LDNT1W_2Z_PSEUDO:
1658 return expandMultiVecPseudo(
1659 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1660 AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED);
1661 case AArch64::LDNT1D_2Z_PSEUDO:
1662 return expandMultiVecPseudo(
1663 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1664 AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED);
1665 case AArch64::LD1B_4Z_IMM_PSEUDO:
1666 return expandMultiVecPseudo(
1667 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1668 AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM);
1669 case AArch64::LD1H_4Z_IMM_PSEUDO:
1670 return expandMultiVecPseudo(
1671 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1672 AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM);
1673 case AArch64::LD1W_4Z_IMM_PSEUDO:
1674 return expandMultiVecPseudo(
1675 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1676 AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM);
1677 case AArch64::LD1D_4Z_IMM_PSEUDO:
1678 return expandMultiVecPseudo(
1679 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1680 AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM);
1681 case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1682 return expandMultiVecPseudo(
1683 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1684 AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM);
1685 case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1686 return expandMultiVecPseudo(
1687 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1688 AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM);
1689 case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1690 return expandMultiVecPseudo(
1691 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1692 AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM);
1693 case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1694 return expandMultiVecPseudo(
1695 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1696 AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM);
1697 case AArch64::LD1B_4Z_PSEUDO:
1698 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1699 AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z,
1700 AArch64::LD1B_4Z_STRIDED);
1701 case AArch64::LD1H_4Z_PSEUDO:
1702 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1703 AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z,
1704 AArch64::LD1H_4Z_STRIDED);
1705 case AArch64::LD1W_4Z_PSEUDO:
1706 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1707 AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z,
1708 AArch64::LD1W_4Z_STRIDED);
1709 case AArch64::LD1D_4Z_PSEUDO:
1710 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1711 AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z,
1712 AArch64::LD1D_4Z_STRIDED);
1713 case AArch64::LDNT1B_4Z_PSEUDO:
1714 return expandMultiVecPseudo(
1715 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1716 AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED);
1717 case AArch64::LDNT1H_4Z_PSEUDO:
1718 return expandMultiVecPseudo(
1719 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1720 AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED);
1721 case AArch64::LDNT1W_4Z_PSEUDO:
1722 return expandMultiVecPseudo(
1723 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1724 AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED);
1725 case AArch64::LDNT1D_4Z_PSEUDO:
1726 return expandMultiVecPseudo(
1727 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1728 AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED);
1729 }
1730 return false;
1731}
1732
1733/// Iterate over the instructions in basic block MBB and expand any
1734/// pseudo instructions. Return true if anything was modified.
1735bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1736 bool Modified = false;
1737
1739 while (MBBI != E) {
1740 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1741 Modified |= expandMI(MBB, MBBI, NMBBI);
1742 MBBI = NMBBI;
1743 }
1744
1745 return Modified;
1746}
1747
1748bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1749 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1750
1751 bool Modified = false;
1752 for (auto &MBB : MF)
1753 Modified |= expandMBB(MBB);
1754 return Modified;
1755}
1756
1757/// Returns an instance of the pseudo instruction expansion pass.
1759 return new AArch64ExpandPseudo();
1760}
#define AARCH64_EXPAND_PSEUDO_NAME
MachineInstrBuilder & UseMI
static MachineInstr * createCallWithOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const AArch64InstrInfo *TII, unsigned Opcode, ArrayRef< MachineOperand > ExplicitOps, unsigned RegMaskStartIdx)
static MachineInstr * createCall(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const AArch64InstrInfo *TII, MachineOperand &CallTarget, unsigned RegMaskStartIdx)
MachineInstrBuilder MachineInstrBuilder & DefMI
SmallVector< AArch64_IMM::ImmInsnModel, 4 > Insn
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
unsigned const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
AArch64FunctionInfo - This class is derived from MachineFunctionInfo and contains private AArch64-spe...
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
The address of a basic block.
Definition: Constants.h:890
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:52
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
Set of metadata that should be preserved when using BuildMI().
instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineInstr * CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImplicit=false)
CreateMachineInstr - Allocate a new MachineInstr.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
void moveCallSiteInfo(const MachineInstr *Old, const MachineInstr *New)
Move the call site info from Old to \New call site info.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
void setDebugInstrNum(unsigned Num)
Set instruction number of this MachineInstr.
Definition: MachineInstr.h:549
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition: Register.h:110
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
CodeModel::Model getCodeModel() const
Returns the code model.
ArrayRef< MCPhysReg > getRegisters() const
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
virtual const TargetRegisterInfo * getRegisterInfo() const
getRegisterInfo - If register information is available, return it.
virtual const TargetInstrInfo * getInstrInfo() const
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
int getSVERevInstr(uint16_t Opcode)
int getSVEPseudoMap(uint16_t Opcode)
int getSVENonRevInstr(uint16_t Opcode)
Key
PAL metadata keys.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Renamable
Register that may be renamed.
@ Define
Register definition.
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition: APFloat.h:1446
unsigned getDeadRegState(bool B)
void initializeAArch64ExpandPseudoPass(PassRegistry &)
FunctionPass * createAArch64ExpandPseudoPass()
Returns an instance of the pseudo instruction expansion pass.
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
unsigned getKillRegState(bool B)
unsigned getRenamableRegState(bool B)
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1886
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.