LLVM 23.0.0git
AArch64ExpandPseudoInsts.cpp
Go to the documentation of this file.
1//===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands pseudo instructions into target
10// instructions to allow proper scheduling and other late optimizations. This
11// pass should be run after register allocation but before the post-regalloc
12// scheduling pass.
13//
14//===----------------------------------------------------------------------===//
15
16#include "AArch64ExpandImm.h"
17#include "AArch64InstrInfo.h"
19#include "AArch64Subtarget.h"
32#include "llvm/IR/DebugLoc.h"
33#include "llvm/MC/MCInstrDesc.h"
34#include "llvm/Pass.h"
38#include <cassert>
39#include <cstdint>
40#include <iterator>
41
42using namespace llvm;
43
44#define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
45
46namespace {
47
48class AArch64ExpandPseudoImpl {
49public:
50 const AArch64InstrInfo *TII;
51
52 bool run(MachineFunction &MF);
53
54private:
55 bool expandMBB(MachineBasicBlock &MBB);
58 bool expandMultiVecPseudo(MachineBasicBlock &MBB,
60 TargetRegisterClass ContiguousClass,
61 TargetRegisterClass StridedClass,
62 unsigned ContiguousOpc, unsigned StridedOpc);
63 bool expandFormTuplePseudo(MachineBasicBlock &MBB,
66 unsigned Size);
68 unsigned BitSize);
69
70 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
72 bool expandSVEBitwisePseudo(MachineInstr &MI, MachineBasicBlock &MBB,
75 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
76 unsigned ExtendImm, unsigned ZeroReg,
78 bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
81 bool expandSetTagLoop(MachineBasicBlock &MBB,
84 bool expandSVESpillFill(MachineBasicBlock &MBB,
86 unsigned N);
87 bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
90 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
92 bool expandSTSHHAtomicStore(MachineBasicBlock &MBB,
94 struct ConditionalBlocks {
95 MachineBasicBlock &CondBB;
96 MachineBasicBlock &EndBB;
97 };
98 ConditionalBlocks expandConditionalPseudo(MachineBasicBlock &MBB,
100 DebugLoc DL,
101 MachineInstrBuilder &Branch);
102 MachineBasicBlock *expandRestoreZASave(MachineBasicBlock &MBB,
104 MachineBasicBlock *expandCommitZASave(MachineBasicBlock &MBB,
106 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
108};
109
110class AArch64ExpandPseudoLegacy : public MachineFunctionPass {
111public:
112 static char ID;
113
114 AArch64ExpandPseudoLegacy() : MachineFunctionPass(ID) {}
115
116 bool runOnMachineFunction(MachineFunction &MF) override;
117
118 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
119};
120
121} // end anonymous namespace
122
123char AArch64ExpandPseudoLegacy::ID = 0;
124
125INITIALIZE_PASS(AArch64ExpandPseudoLegacy, "aarch64-expand-pseudo",
126 AARCH64_EXPAND_PSEUDO_NAME, false, false)
127
128/// Transfer implicit operands on the pseudo instruction to the
129/// instructions created from the expansion.
130static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
132 const MCInstrDesc &Desc = OldMI.getDesc();
133 for (const MachineOperand &MO :
134 llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
135 assert(MO.isReg() && MO.getReg());
136 if (MO.isUse())
137 UseMI.add(MO);
138 else
139 DefMI.add(MO);
140 }
141}
142
143/// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
144/// real move-immediate instructions to synthesize the immediate.
145bool AArch64ExpandPseudoImpl::expandMOVImm(MachineBasicBlock &MBB,
147 unsigned BitSize) {
148 MachineInstr &MI = *MBBI;
149 Register DstReg = MI.getOperand(0).getReg();
150 RegState RenamableState =
151 getRenamableRegState(MI.getOperand(0).isRenamable());
152 uint64_t Imm = MI.getOperand(1).getImm();
153
154 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
155 // Useless def, and we don't want to risk creating an invalid ORR (which
156 // would really write to sp).
157 MI.eraseFromParent();
158 return true;
159 }
160
162 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
163 assert(Insn.size() != 0);
164
165 SmallVector<MachineInstrBuilder, 4> MIBS;
166 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
167 bool LastItem = std::next(I) == E;
168 switch (I->Opcode)
169 {
170 default: llvm_unreachable("unhandled!"); break;
171
172 case AArch64::ORRWri:
173 case AArch64::ORRXri:
174 if (I->Op1 == 0) {
175 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
176 .add(MI.getOperand(0))
177 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
178 .addImm(I->Op2));
179 } else {
180 Register DstReg = MI.getOperand(0).getReg();
181 bool DstIsDead = MI.getOperand(0).isDead();
182 MIBS.push_back(
183 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
184 .addReg(DstReg, RegState::Define |
185 getDeadRegState(DstIsDead && LastItem) |
186 RenamableState)
187 .addReg(DstReg)
188 .addImm(I->Op2));
189 }
190 break;
191 case AArch64::EONXrs:
192 case AArch64::EORXrs:
193 case AArch64::ORRWrs:
194 case AArch64::ORRXrs: {
195 Register DstReg = MI.getOperand(0).getReg();
196 bool DstIsDead = MI.getOperand(0).isDead();
197 MIBS.push_back(
198 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
199 .addReg(DstReg, RegState::Define |
200 getDeadRegState(DstIsDead && LastItem) |
201 RenamableState)
202 .addReg(DstReg)
203 .addReg(DstReg)
204 .addImm(I->Op2));
205 } break;
206 case AArch64::ANDXri:
207 case AArch64::EORXri:
208 if (I->Op1 == 0) {
209 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
210 .add(MI.getOperand(0))
211 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
212 .addImm(I->Op2));
213 } else {
214 Register DstReg = MI.getOperand(0).getReg();
215 bool DstIsDead = MI.getOperand(0).isDead();
216 MIBS.push_back(
217 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
218 .addReg(DstReg, RegState::Define |
219 getDeadRegState(DstIsDead && LastItem) |
220 RenamableState)
221 .addReg(DstReg)
222 .addImm(I->Op2));
223 }
224 break;
225 case AArch64::MOVNWi:
226 case AArch64::MOVNXi:
227 case AArch64::MOVZWi:
228 case AArch64::MOVZXi: {
229 bool DstIsDead = MI.getOperand(0).isDead();
230 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
231 .addReg(DstReg, RegState::Define |
232 getDeadRegState(DstIsDead && LastItem) |
233 RenamableState)
234 .addImm(I->Op1)
235 .addImm(I->Op2));
236 } break;
237 case AArch64::MOVKWi:
238 case AArch64::MOVKXi: {
239 Register DstReg = MI.getOperand(0).getReg();
240 bool DstIsDead = MI.getOperand(0).isDead();
241 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
242 .addReg(DstReg,
243 RegState::Define |
244 getDeadRegState(DstIsDead && LastItem) |
245 RenamableState)
246 .addReg(DstReg)
247 .addImm(I->Op1)
248 .addImm(I->Op2));
249 } break;
250 }
251 }
252 transferImpOps(MI, MIBS.front(), MIBS.back());
253 MI.eraseFromParent();
254 return true;
255}
256
257bool AArch64ExpandPseudoImpl::expandCMP_SWAP(
258 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
259 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
260 MachineBasicBlock::iterator &NextMBBI) {
261 MachineInstr &MI = *MBBI;
262 MIMetadata MIMD(MI);
263 const MachineOperand &Dest = MI.getOperand(0);
264 Register StatusReg = MI.getOperand(1).getReg();
265 bool StatusDead = MI.getOperand(1).isDead();
266 // Duplicating undef operands into 2 instructions does not guarantee the same
267 // value on both; However undef should be replaced by xzr anyway.
268 assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
269 Register AddrReg = MI.getOperand(2).getReg();
270 Register DesiredReg = MI.getOperand(3).getReg();
271 Register NewReg = MI.getOperand(4).getReg();
272
273 MachineFunction *MF = MBB.getParent();
274 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
275 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
276 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
277
278 MF->insert(++MBB.getIterator(), LoadCmpBB);
279 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
280 MF->insert(++StoreBB->getIterator(), DoneBB);
281
282 // .Lloadcmp:
283 // mov wStatus, 0
284 // ldaxr xDest, [xAddr]
285 // cmp xDest, xDesired
286 // b.ne .Ldone
287 if (!StatusDead)
288 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg)
289 .addImm(0).addImm(0);
290 BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg())
291 .addReg(AddrReg);
292 BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg)
293 .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
294 .addReg(DesiredReg)
295 .addImm(ExtendImm);
296 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc))
298 .addMBB(DoneBB)
299 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
300 LoadCmpBB->addSuccessor(DoneBB);
301 LoadCmpBB->addSuccessor(StoreBB);
302
303 // .Lstore:
304 // stlxr wStatus, xNew, [xAddr]
305 // cbnz wStatus, .Lloadcmp
306 BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg)
307 .addReg(NewReg)
308 .addReg(AddrReg);
309 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
310 .addReg(StatusReg, getKillRegState(StatusDead))
311 .addMBB(LoadCmpBB);
312 StoreBB->addSuccessor(LoadCmpBB);
313 StoreBB->addSuccessor(DoneBB);
314
315 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
316 DoneBB->transferSuccessors(&MBB);
317
318 MBB.addSuccessor(LoadCmpBB);
319
320 NextMBBI = MBB.end();
321 MI.eraseFromParent();
322
323 // Recompute livein lists.
324 LivePhysRegs LiveRegs;
325 computeAndAddLiveIns(LiveRegs, *DoneBB);
326 computeAndAddLiveIns(LiveRegs, *StoreBB);
327 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
328 // Do an extra pass around the loop to get loop carried registers right.
329 StoreBB->clearLiveIns();
330 computeAndAddLiveIns(LiveRegs, *StoreBB);
331 LoadCmpBB->clearLiveIns();
332 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
333
334 return true;
335}
336
337bool AArch64ExpandPseudoImpl::expandCMP_SWAP_128(
338 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
339 MachineBasicBlock::iterator &NextMBBI) {
340 MachineInstr &MI = *MBBI;
341 MIMetadata MIMD(MI);
342 MachineOperand &DestLo = MI.getOperand(0);
343 MachineOperand &DestHi = MI.getOperand(1);
344 Register StatusReg = MI.getOperand(2).getReg();
345 bool StatusDead = MI.getOperand(2).isDead();
346 // Duplicating undef operands into 2 instructions does not guarantee the same
347 // value on both; However undef should be replaced by xzr anyway.
348 assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
349 Register AddrReg = MI.getOperand(3).getReg();
350 Register DesiredLoReg = MI.getOperand(4).getReg();
351 Register DesiredHiReg = MI.getOperand(5).getReg();
352 Register NewLoReg = MI.getOperand(6).getReg();
353 Register NewHiReg = MI.getOperand(7).getReg();
354
355 unsigned LdxpOp, StxpOp;
356
357 switch (MI.getOpcode()) {
358 case AArch64::CMP_SWAP_128_MONOTONIC:
359 LdxpOp = AArch64::LDXPX;
360 StxpOp = AArch64::STXPX;
361 break;
362 case AArch64::CMP_SWAP_128_RELEASE:
363 LdxpOp = AArch64::LDXPX;
364 StxpOp = AArch64::STLXPX;
365 break;
366 case AArch64::CMP_SWAP_128_ACQUIRE:
367 LdxpOp = AArch64::LDAXPX;
368 StxpOp = AArch64::STXPX;
369 break;
370 case AArch64::CMP_SWAP_128:
371 LdxpOp = AArch64::LDAXPX;
372 StxpOp = AArch64::STLXPX;
373 break;
374 default:
375 llvm_unreachable("Unexpected opcode");
376 }
377
378 MachineFunction *MF = MBB.getParent();
379 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
380 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
381 auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
382 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
383
384 MF->insert(++MBB.getIterator(), LoadCmpBB);
385 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
386 MF->insert(++StoreBB->getIterator(), FailBB);
387 MF->insert(++FailBB->getIterator(), DoneBB);
388
389 // .Lloadcmp:
390 // ldaxp xDestLo, xDestHi, [xAddr]
391 // cmp xDestLo, xDesiredLo
392 // sbcs xDestHi, xDesiredHi
393 // b.ne .Ldone
394 BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp))
395 .addReg(DestLo.getReg(), RegState::Define)
396 .addReg(DestHi.getReg(), RegState::Define)
397 .addReg(AddrReg);
398 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
399 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
400 .addReg(DesiredLoReg)
401 .addImm(0);
402 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
403 .addUse(AArch64::WZR)
404 .addUse(AArch64::WZR)
406 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
407 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
408 .addReg(DesiredHiReg)
409 .addImm(0);
410 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
411 .addUse(StatusReg, RegState::Kill)
412 .addUse(StatusReg, RegState::Kill)
414 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW))
415 .addUse(StatusReg, getKillRegState(StatusDead))
416 .addMBB(FailBB);
417 LoadCmpBB->addSuccessor(FailBB);
418 LoadCmpBB->addSuccessor(StoreBB);
419
420 // .Lstore:
421 // stlxp wStatus, xNewLo, xNewHi, [xAddr]
422 // cbnz wStatus, .Lloadcmp
423 BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg)
424 .addReg(NewLoReg)
425 .addReg(NewHiReg)
426 .addReg(AddrReg);
427 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
428 .addReg(StatusReg, getKillRegState(StatusDead))
429 .addMBB(LoadCmpBB);
430 BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB);
431 StoreBB->addSuccessor(LoadCmpBB);
432 StoreBB->addSuccessor(DoneBB);
433
434 // .Lfail:
435 // stlxp wStatus, xDestLo, xDestHi, [xAddr]
436 // cbnz wStatus, .Lloadcmp
437 BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg)
438 .addReg(DestLo.getReg())
439 .addReg(DestHi.getReg())
440 .addReg(AddrReg);
441 BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW))
442 .addReg(StatusReg, getKillRegState(StatusDead))
443 .addMBB(LoadCmpBB);
444 FailBB->addSuccessor(LoadCmpBB);
445 FailBB->addSuccessor(DoneBB);
446
447 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
448 DoneBB->transferSuccessors(&MBB);
449
450 MBB.addSuccessor(LoadCmpBB);
451
452 NextMBBI = MBB.end();
453 MI.eraseFromParent();
454
455 // Recompute liveness bottom up.
456 LivePhysRegs LiveRegs;
457 computeAndAddLiveIns(LiveRegs, *DoneBB);
458 computeAndAddLiveIns(LiveRegs, *FailBB);
459 computeAndAddLiveIns(LiveRegs, *StoreBB);
460 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
461
462 // Do an extra pass in the loop to get the loop carried dependencies right.
463 FailBB->clearLiveIns();
464 computeAndAddLiveIns(LiveRegs, *FailBB);
465 StoreBB->clearLiveIns();
466 computeAndAddLiveIns(LiveRegs, *StoreBB);
467 LoadCmpBB->clearLiveIns();
468 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
469
470 return true;
471}
472
473/// \brief Expand Pseudos to Instructions with destructive operands.
474///
475/// This mechanism uses MOVPRFX instructions for zeroing the false lanes
476/// or for fixing relaxed register allocation conditions to comply with
477/// the instructions register constraints. The latter case may be cheaper
478/// than setting the register constraints in the register allocator,
479/// since that will insert regular MOV instructions rather than MOVPRFX.
480///
481/// Example (after register allocation):
482///
483/// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
484///
485/// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
486/// * We cannot map directly to FSUB_ZPmZ_B because the register
487/// constraints of the instruction are not met.
488/// * Also the _ZERO specifies the false lanes need to be zeroed.
489///
490/// We first try to see if the destructive operand == result operand,
491/// if not, we try to swap the operands, e.g.
492///
493/// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
494///
495/// But because FSUB_ZPmZ is not commutative, this is semantically
496/// different, so we need a reverse instruction:
497///
498/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
499///
500/// Then we implement the zeroing of the false lanes of Z0 by adding
501/// a zeroing MOVPRFX instruction:
502///
503/// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
504/// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
505///
506/// Note that this can only be done for _ZERO or _UNDEF variants where
507/// we can guarantee the false lanes to be zeroed (by implementing this)
508/// or that they are undef (don't care / not used), otherwise the
509/// swapping of operands is illegal because the operation is not
510/// (or cannot be emulated to be) fully commutative.
511bool AArch64ExpandPseudoImpl::expand_DestructiveOp(
512 MachineInstr &MI, MachineBasicBlock &MBB,
514 unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
515 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
516 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
517 bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
518 Register DstReg = MI.getOperand(0).getReg();
519 bool DstIsDead = MI.getOperand(0).isDead();
520 bool UseRev = false;
521 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
522
523 switch (DType) {
526 if (DstReg == MI.getOperand(3).getReg()) {
527 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
528 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
529 UseRev = true;
530 break;
531 }
532 [[fallthrough]];
535 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
536 break;
538 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
539 break;
541 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
542 if (DstReg == MI.getOperand(3).getReg()) {
543 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
544 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
545 UseRev = true;
546 } else if (DstReg == MI.getOperand(4).getReg()) {
547 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
548 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
549 UseRev = true;
550 }
551 break;
553 // EXT_ZZI_CONSTRUCTIVE Zd, Zs, Imm
554 // ==> MOVPRFX Zd Zs; EXT_ZZI Zd, Zd, Zs, Imm
555 std::tie(DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 1, 2);
556 break;
557 default:
558 llvm_unreachable("Unsupported Destructive Operand type");
559 }
560
561 // MOVPRFX can only be used if the destination operand
562 // is the destructive operand, not as any other operand,
563 // so the Destructive Operand must be unique.
564 bool DOPRegIsUnique = false;
565 switch (DType) {
567 DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();
568 break;
571 DOPRegIsUnique =
572 DstReg != MI.getOperand(DOPIdx).getReg() ||
573 MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
574 break;
578 DOPRegIsUnique = true;
579 break;
581 DOPRegIsUnique =
582 DstReg != MI.getOperand(DOPIdx).getReg() ||
583 (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
584 MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
585 break;
586 }
587
588 // Resolve the reverse opcode
589 if (UseRev) {
590 int NewOpcode;
591 // e.g. DIV -> DIVR
592 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
593 Opcode = NewOpcode;
594 // e.g. DIVR -> DIV
595 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
596 Opcode = NewOpcode;
597 }
598
599 // Get the right MOVPRFX
600 uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
601 unsigned MovPrfx, LSLZero, MovPrfxZero;
602 switch (ElementSize) {
605 MovPrfx = AArch64::MOVPRFX_ZZ;
606 LSLZero = AArch64::LSL_ZPmI_B;
607 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
608 break;
610 MovPrfx = AArch64::MOVPRFX_ZZ;
611 LSLZero = AArch64::LSL_ZPmI_H;
612 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
613 break;
615 MovPrfx = AArch64::MOVPRFX_ZZ;
616 LSLZero = AArch64::LSL_ZPmI_S;
617 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
618 break;
620 MovPrfx = AArch64::MOVPRFX_ZZ;
621 LSLZero = AArch64::LSL_ZPmI_D;
622 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
623 break;
624 default:
625 llvm_unreachable("Unsupported ElementSize");
626 }
627
628 // Preserve undef state until DOP's reg is defined.
629 RegState DOPRegState = getUndefRegState(MI.getOperand(DOPIdx).isUndef());
630
631 //
632 // Create the destructive operation (if required)
633 //
634 MachineInstrBuilder PRFX, DOP;
635 if (FalseZero) {
636 // If we cannot prefix the requested instruction we'll instead emit a
637 // prefixed_zeroing_mov for DestructiveBinary.
638 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
641 "The destructive operand should be unique");
642 assert(ElementSize != AArch64::ElementSizeNone &&
643 "This instruction is unpredicated");
644
645 // Merge source operand into destination register
646 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
647 .addReg(DstReg, RegState::Define)
648 .addReg(MI.getOperand(PredIdx).getReg())
649 .addReg(MI.getOperand(DOPIdx).getReg(), DOPRegState);
650
651 // After the movprfx, the destructive operand is same as Dst
652 DOPIdx = 0;
653 DOPRegState = {};
654
655 // Create the additional LSL to zero the lanes when the DstReg is not
656 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
657 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
658 if ((DType == AArch64::DestructiveBinary ||
661 !DOPRegIsUnique) {
662 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
663 .addReg(DstReg, RegState::Define)
664 .add(MI.getOperand(PredIdx))
665 .addReg(DstReg)
666 .addImm(0);
667 }
668 } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
669 assert(DOPRegIsUnique && "The destructive operand should be unique");
670 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
671 .addReg(DstReg, RegState::Define)
672 .addReg(MI.getOperand(DOPIdx).getReg(), DOPRegState);
673 DOPIdx = 0;
674 DOPRegState = {};
675 }
676
677 //
678 // Create the destructive operation
679 //
680 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
681 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
682 DOPRegState = DOPRegState | RegState::Kill;
683
684 switch (DType) {
686 DOP.addReg(MI.getOperand(DOPIdx).getReg(), DOPRegState)
687 .add(MI.getOperand(PredIdx))
688 .add(MI.getOperand(SrcIdx));
689 break;
694 DOP.add(MI.getOperand(PredIdx))
695 .addReg(MI.getOperand(DOPIdx).getReg(), DOPRegState)
696 .add(MI.getOperand(SrcIdx));
697 break;
699 DOP.add(MI.getOperand(PredIdx))
700 .addReg(MI.getOperand(DOPIdx).getReg(), DOPRegState)
701 .add(MI.getOperand(SrcIdx))
702 .add(MI.getOperand(Src2Idx));
703 break;
705 DOP.addReg(MI.getOperand(DOPIdx).getReg(), DOPRegState)
706 .add(MI.getOperand(SrcIdx))
707 .add(MI.getOperand(Src2Idx));
708 break;
709 }
710
711 if (PRFX) {
712 transferImpOps(MI, PRFX, DOP);
714 } else
715 transferImpOps(MI, DOP, DOP);
716
717 MI.eraseFromParent();
718 return true;
719}
720
721bool AArch64ExpandPseudoImpl::expandSVEBitwisePseudo(
722 MachineInstr &MI, MachineBasicBlock &MBB,
724 MachineInstrBuilder PRFX, DOP;
725 const unsigned Opcode = MI.getOpcode();
726 const MachineOperand &Op0 = MI.getOperand(0);
727 const MachineOperand *Op1 = &MI.getOperand(1);
728 const MachineOperand *Op2 = &MI.getOperand(2);
729 const Register DOPReg = Op0.getReg();
730
731 if (DOPReg == Op2->getReg()) {
732 // Commute the operands to allow destroying the second source.
733 std::swap(Op1, Op2);
734 } else if (DOPReg != Op1->getReg()) {
735 // If not in destructive form, emit a MOVPRFX. The input should only be
736 // killed if unused by the subsequent instruction.
737 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVPRFX_ZZ))
739 .addReg(Op1->getReg(),
741 getUndefRegState(Op1->isUndef()) |
742 getKillRegState(Op1->isKill() &&
743 Opcode == AArch64::NAND_ZZZ));
744 }
745
746 assert((DOPReg == Op1->getReg() || PRFX) && "invalid expansion");
747
748 const RegState DOPRegState = getRenamableRegState(Op0.isRenamable()) |
749 getUndefRegState(!PRFX && Op1->isUndef()) |
750 RegState::Kill;
751
752 switch (Opcode) {
753 default:
754 llvm_unreachable("unhandled opcode");
755 case AArch64::EON_ZZZ:
756 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BSL2N_ZZZZ))
757 .add(Op0)
758 .addReg(DOPReg, DOPRegState)
759 .add(*Op1)
760 .add(*Op2);
761 break;
762 case AArch64::NAND_ZZZ:
763 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::NBSL_ZZZZ))
764 .add(Op0)
765 .addReg(DOPReg, DOPRegState)
766 .add(*Op2)
767 .add(*Op2);
768 break;
769 case AArch64::NOR_ZZZ:
770 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::NBSL_ZZZZ))
771 .add(Op0)
772 .addReg(DOPReg, DOPRegState)
773 .add(*Op2)
774 .add(*Op1);
775 break;
776 }
777
778 if (PRFX) {
779 transferImpOps(MI, PRFX, DOP);
781 } else {
782 transferImpOps(MI, DOP, DOP);
783 }
784
785 MI.eraseFromParent();
786 return true;
787}
788
789bool AArch64ExpandPseudoImpl::expandSetTagLoop(
790 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
791 MachineBasicBlock::iterator &NextMBBI) {
792 MachineInstr &MI = *MBBI;
793 DebugLoc DL = MI.getDebugLoc();
794 Register SizeReg = MI.getOperand(0).getReg();
795 Register AddressReg = MI.getOperand(1).getReg();
796
797 MachineFunction *MF = MBB.getParent();
798
799 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
800 const unsigned OpCode1 =
801 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
802 const unsigned OpCode2 =
803 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
804
805 unsigned Size = MI.getOperand(2).getImm();
806 assert(Size > 0 && Size % 16 == 0);
807 if (Size % (16 * 2) != 0) {
808 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
809 .addReg(AddressReg)
810 .addReg(AddressReg)
811 .addImm(1);
812 Size -= 16;
813 }
815 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
816 .addImm(Size);
817 expandMOVImm(MBB, I, 64);
818
819 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
820 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
821
822 MF->insert(++MBB.getIterator(), LoopBB);
823 MF->insert(++LoopBB->getIterator(), DoneBB);
824
825 BuildMI(LoopBB, DL, TII->get(OpCode2))
826 .addDef(AddressReg)
827 .addReg(AddressReg)
828 .addReg(AddressReg)
829 .addImm(2)
831 .setMIFlags(MI.getFlags());
832 BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri))
833 .addDef(SizeReg)
834 .addReg(SizeReg)
835 .addImm(16 * 2)
836 .addImm(0);
837 BuildMI(LoopBB, DL, TII->get(AArch64::Bcc))
839 .addMBB(LoopBB)
840 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
841
842 LoopBB->addSuccessor(LoopBB);
843 LoopBB->addSuccessor(DoneBB);
844
845 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
846 DoneBB->transferSuccessors(&MBB);
847
848 MBB.addSuccessor(LoopBB);
849
850 NextMBBI = MBB.end();
851 MI.eraseFromParent();
852 // Recompute liveness bottom up.
853 LivePhysRegs LiveRegs;
854 computeAndAddLiveIns(LiveRegs, *DoneBB);
855 computeAndAddLiveIns(LiveRegs, *LoopBB);
856 // Do an extra pass in the loop to get the loop carried dependencies right.
857 // FIXME: is this necessary?
858 LoopBB->clearLiveIns();
859 computeAndAddLiveIns(LiveRegs, *LoopBB);
860 DoneBB->clearLiveIns();
861 computeAndAddLiveIns(LiveRegs, *DoneBB);
862
863 return true;
864}
865
866bool AArch64ExpandPseudoImpl::expandSVESpillFill(
867 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned Opc,
868 unsigned N) {
869 assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
870 Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
871 "Unexpected opcode");
872 RegState RState =
873 getDefRegState(Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI);
874 unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
875 ? AArch64::zsub0
876 : AArch64::psub0;
877 const TargetRegisterInfo *TRI =
879 MachineInstr &MI = *MBBI;
880 for (unsigned Offset = 0; Offset < N; ++Offset) {
881 int ImmOffset = MI.getOperand(2).getImm() + Offset;
882 bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
883 assert(ImmOffset >= -256 && ImmOffset < 256 &&
884 "Immediate spill offset out of range");
885 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
886 .addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset),
887 RState)
888 .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
889 .addImm(ImmOffset);
890 }
891 MI.eraseFromParent();
892 return true;
893}
894
895// Create a call with the passed opcode and explicit operands, copying over all
896// the implicit operands from *MBBI, starting at the regmask.
899 const AArch64InstrInfo *TII,
900 unsigned Opcode,
901 ArrayRef<MachineOperand> ExplicitOps,
902 unsigned RegMaskStartIdx) {
903 // Build the MI, with explicit operands first (including the call target).
904 MachineInstr *Call = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode))
905 .add(ExplicitOps)
906 .getInstr();
907
908 // Register arguments are added during ISel, but cannot be added as explicit
909 // operands of the branch as it expects to be B <target> which is only one
910 // operand. Instead they are implicit operands used by the branch.
911 while (!MBBI->getOperand(RegMaskStartIdx).isRegMask()) {
912 const MachineOperand &MOP = MBBI->getOperand(RegMaskStartIdx);
913 assert(MOP.isReg() && "can only add register operands");
915 MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,
916 /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
917 RegMaskStartIdx++;
918 }
919 for (const MachineOperand &MO :
920 llvm::drop_begin(MBBI->operands(), RegMaskStartIdx))
921 Call->addOperand(MO);
922
923 return Call;
924}
925
926// Create a call to CallTarget, copying over all the operands from *MBBI,
927// starting at the regmask.
930 const AArch64InstrInfo *TII,
931 MachineOperand &CallTarget,
932 unsigned RegMaskStartIdx) {
933 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
934
935 assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
936 "invalid operand for regular call");
937 return createCallWithOps(MBB, MBBI, TII, Opc, CallTarget, RegMaskStartIdx);
938}
939
940bool AArch64ExpandPseudoImpl::expandCALL_RVMARKER(
941 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
942 // Expand CALL_RVMARKER pseudo to:
943 // - a branch to the call target, followed by
944 // - the special `mov x29, x29` marker, if necessary, and
945 // - another branch, to the runtime function
946 // Mark the sequence as bundle, to avoid passes moving other code in between.
947 MachineInstr &MI = *MBBI;
948 MachineOperand &RVTarget = MI.getOperand(0);
949 bool DoEmitMarker = MI.getOperand(1).getImm();
950 assert(RVTarget.isGlobal() && "invalid operand for attached call");
951
952 MachineInstr *OriginalCall = nullptr;
953
954 if (MI.getOpcode() == AArch64::BLRA_RVMARKER) {
955 // ptrauth call.
956 const MachineOperand &CallTarget = MI.getOperand(2);
957 const MachineOperand &Key = MI.getOperand(3);
958 const MachineOperand &IntDisc = MI.getOperand(4);
959 const MachineOperand &AddrDisc = MI.getOperand(5);
960
961 assert((Key.getImm() == AArch64PACKey::IA ||
962 Key.getImm() == AArch64PACKey::IB) &&
963 "Invalid auth call key");
964
965 MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc};
966
967 OriginalCall = createCallWithOps(MBB, MBBI, TII, AArch64::BLRA, Ops,
968 /*RegMaskStartIdx=*/6);
969 } else {
970 assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI");
971 OriginalCall = createCall(MBB, MBBI, TII, MI.getOperand(2),
972 // Regmask starts after the RV and call targets.
973 /*RegMaskStartIdx=*/3);
974 }
975
976 if (DoEmitMarker)
977 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
978 .addReg(AArch64::FP, RegState::Define)
979 .addReg(AArch64::XZR)
980 .addReg(AArch64::FP)
981 .addImm(0);
982
983 auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
984 .add(RVTarget)
985 .getInstr();
986
987 if (MI.shouldUpdateAdditionalCallInfo())
988 MBB.getParent()->moveAdditionalCallInfo(&MI, OriginalCall);
989
990 MI.eraseFromParent();
991 finalizeBundle(MBB, OriginalCall->getIterator(),
992 std::next(RVCall->getIterator()));
993 return true;
994}
995
996bool AArch64ExpandPseudoImpl::expandCALL_BTI(MachineBasicBlock &MBB,
998 // Expand CALL_BTI pseudo to:
999 // - a branch to the call target
1000 // - a BTI instruction
1001 // Mark the sequence as a bundle, to avoid passes moving other code in
1002 // between.
1003 MachineInstr &MI = *MBBI;
1004 MachineInstr *Call = createCall(MBB, MBBI, TII, MI.getOperand(0),
1005 // Regmask starts after the call target.
1006 /*RegMaskStartIdx=*/1);
1007
1008 Call->setCFIType(*MBB.getParent(), MI.getCFIType());
1009
1010 MachineInstr *BTI =
1011 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
1012 // BTI J so that setjmp can to BR to this.
1013 .addImm(36)
1014 .getInstr();
1015
1016 if (MI.shouldUpdateAdditionalCallInfo())
1018
1019 MI.eraseFromParent();
1020 finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
1021 return true;
1022}
1023
1024bool AArch64ExpandPseudoImpl::expandStoreSwiftAsyncContext(
1025 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
1026 Register CtxReg = MBBI->getOperand(0).getReg();
1027 Register BaseReg = MBBI->getOperand(1).getReg();
1028 int Offset = MBBI->getOperand(2).getImm();
1029 DebugLoc DL(MBBI->getDebugLoc());
1030 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
1031
1032 if (STI.getTargetTriple().getArchName() != "arm64e") {
1033 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
1034 .addUse(CtxReg)
1035 .addUse(BaseReg)
1036 .addImm(Offset / 8)
1039 return true;
1040 }
1041
1042 // We need to sign the context in an address-discriminated way. 0xc31a is a
1043 // fixed random value, chosen as part of the ABI.
1044 // add x16, xBase, #Offset
1045 // movk x16, #0xc31a, lsl #48
1046 // mov x17, x22/xzr
1047 // pacdb x17, x16
1048 // str x17, [xBase, #Offset]
1049 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
1050 BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
1051 .addUse(BaseReg)
1052 .addImm(abs(Offset))
1053 .addImm(0)
1055 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
1056 .addUse(AArch64::X16)
1057 .addImm(0xc31a)
1058 .addImm(48)
1060 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
1061 // move it somewhere before signing.
1062 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
1063 .addUse(AArch64::XZR)
1064 .addUse(CtxReg)
1065 .addImm(0)
1067 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
1068 .addUse(AArch64::X17)
1069 .addUse(AArch64::X16)
1071 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
1072 .addUse(AArch64::X17)
1073 .addUse(BaseReg)
1074 .addImm(Offset / 8)
1076
1078 return true;
1079}
1080
1081bool AArch64ExpandPseudoImpl::expandSTSHHAtomicStore(
1082 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
1083 MachineInstr &MI = *MBBI;
1084 DebugLoc DL(MI.getDebugLoc());
1085
1086 unsigned Order = MI.getOperand(2).getImm();
1087 unsigned Policy = MI.getOperand(3).getImm();
1088 unsigned Size = MI.getOperand(4).getImm();
1089
1090 bool IsRelaxed = Order == 0;
1091 unsigned StoreOpc = 0;
1092
1093 // __ATOMIC_RELAXED uses STR. __ATOMIC_{RELEASE/SEQ_CST} use STLR.
1094 switch (Size) {
1095 case 8:
1096 StoreOpc = IsRelaxed ? AArch64::STRBBui : AArch64::STLRB;
1097 break;
1098 case 16:
1099 StoreOpc = IsRelaxed ? AArch64::STRHHui : AArch64::STLRH;
1100 break;
1101 case 32:
1102 StoreOpc = IsRelaxed ? AArch64::STRWui : AArch64::STLRW;
1103 break;
1104 case 64:
1105 StoreOpc = IsRelaxed ? AArch64::STRXui : AArch64::STLRX;
1106 break;
1107 default:
1108 llvm_unreachable("Unexpected STSHH atomic store size");
1109 }
1110
1111 // Emit the hint with the retention policy immediate.
1112 MachineInstr *Hint = BuildMI(MBB, MBBI, DL, TII->get(AArch64::STSHH))
1113 .addImm(Policy)
1114 .getInstr();
1115
1116 // Emit the associated store instruction.
1117 Register ValReg = MI.getOperand(0).getReg();
1118
1119 if (Size < 64) {
1120 const TargetRegisterInfo *TRI =
1122 Register SubReg = TRI->getSubReg(ValReg, AArch64::sub_32);
1123 if (SubReg)
1124 ValReg = SubReg;
1125 }
1126
1127 MachineInstrBuilder Store = BuildMI(MBB, MBBI, DL, TII->get(StoreOpc))
1128 .addReg(ValReg)
1129 .add(MI.getOperand(1));
1130
1131 // Relaxed uses base+imm addressing with a zero offset.
1132 if (IsRelaxed)
1133 Store.addImm(0);
1134
1135 // Preserve memory operands and any implicit uses/defs.
1136 Store->setMemRefs(*MBB.getParent(), MI.memoperands());
1137 transferImpOps(MI, Store, Store);
1138
1139 // Bundle the hint and store so they remain adjacent.
1140 finalizeBundle(MBB, Hint->getIterator(), std::next(Store->getIterator()));
1141
1142 MI.eraseFromParent();
1143 return true;
1144}
1145
1146AArch64ExpandPseudoImpl::ConditionalBlocks
1147AArch64ExpandPseudoImpl::expandConditionalPseudo(
1148 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL,
1149 MachineInstrBuilder &Branch) {
1150 assert((std::next(MBBI) != MBB.end() ||
1151 MBB.successors().begin() != MBB.successors().end()) &&
1152 "Unexpected unreachable in block");
1153
1154 // Split MBB and create two new blocks:
1155 // - MBB now contains all instructions before the conditional pseudo.
1156 // - CondBB contains the conditional pseudo instruction only.
1157 // - EndBB contains all instructions after the conditional pseudo.
1158 MachineInstr &PrevMI = *std::prev(MBBI);
1159 MachineBasicBlock *CondBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
1160 MachineBasicBlock *EndBB =
1161 std::next(MBBI) == CondBB->end()
1162 ? *CondBB->successors().begin()
1163 : CondBB->splitAt(*MBBI, /*UpdateLiveIns*/ true);
1164
1165 // Add the SMBB label to the branch instruction & create a branch to EndBB.
1166 Branch.addMBB(CondBB);
1167 BuildMI(&MBB, DL, TII->get(AArch64::B))
1168 .addMBB(EndBB);
1169 MBB.addSuccessor(EndBB);
1170
1171 // Create branch from CondBB to EndBB. Users of this helper should insert new
1172 // instructions at CondBB.back() -- i.e. before the branch.
1173 BuildMI(CondBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1174 return {*CondBB, *EndBB};
1175}
1176
1177MachineBasicBlock *
1178AArch64ExpandPseudoImpl::expandRestoreZASave(MachineBasicBlock &MBB,
1180 MachineInstr &MI = *MBBI;
1181 DebugLoc DL = MI.getDebugLoc();
1182
1183 // Compare TPIDR2_EL0 against 0. Restore ZA if TPIDR2_EL0 is zero.
1184 MachineInstrBuilder Branch =
1185 BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX)).add(MI.getOperand(0));
1186
1187 auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch);
1188 // Replace the pseudo with a call (BL).
1189 MachineInstrBuilder MIB =
1190 BuildMI(CondBB, CondBB.back(), DL, TII->get(AArch64::BL));
1191 // Copy operands (mainly the regmask) from the pseudo.
1192 for (unsigned I = 2; I < MI.getNumOperands(); ++I)
1193 MIB.add(MI.getOperand(I));
1194 // Mark the TPIDR2 block pointer (X0) as an implicit use.
1195 MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit);
1196
1197 MI.eraseFromParent();
1198 return &EndBB;
1199}
1200
1201static constexpr unsigned ZERO_ALL_ZA_MASK = 0b11111111;
1202
1204AArch64ExpandPseudoImpl::expandCommitZASave(MachineBasicBlock &MBB,
1206 MachineInstr &MI = *MBBI;
1207 DebugLoc DL = MI.getDebugLoc();
1208 [[maybe_unused]] auto *RI = MBB.getParent()->getSubtarget().getRegisterInfo();
1209
1210 // Compare TPIDR2_EL0 against 0. Commit ZA if TPIDR2_EL0 is non-zero.
1211 MachineInstrBuilder Branch =
1212 BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBNZX)).add(MI.getOperand(0));
1213
1214 auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Branch);
1215 // Replace the pseudo with a call (BL).
1217 BuildMI(CondBB, CondBB.back(), DL, TII->get(AArch64::BL));
1218 // Copy operands (mainly the regmask) from the pseudo.
1219 for (unsigned I = 3; I < MI.getNumOperands(); ++I)
1220 MIB.add(MI.getOperand(I));
1221 // Clear TPIDR2_EL0.
1222 BuildMI(CondBB, CondBB.back(), DL, TII->get(AArch64::MSR))
1223 .addImm(AArch64SysReg::TPIDR2_EL0)
1224 .addReg(AArch64::XZR);
1225 bool ZeroZA = MI.getOperand(1).getImm() != 0;
1226 bool ZeroZT0 = MI.getOperand(2).getImm() != 0;
1227 if (ZeroZA) {
1228 assert(MI.definesRegister(AArch64::ZAB0, RI) && "should define ZA!");
1229 BuildMI(CondBB, CondBB.back(), DL, TII->get(AArch64::ZERO_M))
1231 .addDef(AArch64::ZAB0, RegState::ImplicitDefine);
1232 }
1233 if (ZeroZT0) {
1234 assert(MI.definesRegister(AArch64::ZT0, RI) && "should define ZT0!");
1235 BuildMI(CondBB, CondBB.back(), DL, TII->get(AArch64::ZERO_T))
1236 .addDef(AArch64::ZT0);
1237 }
1238
1239 MI.eraseFromParent();
1240 return &EndBB;
1241}
1242
1243MachineBasicBlock *
1244AArch64ExpandPseudoImpl::expandCondSMToggle(MachineBasicBlock &MBB,
1246 MachineInstr &MI = *MBBI;
1247 // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
1248 // Exception handling code generated by Clang may introduce unreachables and it
1249 // seems unnecessary to restore pstate.sm when that happens. Note that it is
1250 // not just an optimisation, the code below expects a successor instruction/block
1251 // in order to split the block at MBBI.
1252 if (std::next(MBBI) == MBB.end() &&
1253 MI.getParent()->successors().begin() ==
1254 MI.getParent()->successors().end()) {
1255 MI.eraseFromParent();
1256 return &MBB;
1257 }
1258
1259 // Expand the pseudo into smstart or smstop instruction. The pseudo has the
1260 // following operands:
1261 //
1262 // MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask>
1263 //
1264 // The pseudo is expanded into a conditional smstart/smstop, with a
1265 // check if pstate.sm (register) equals the expected value, and if not,
1266 // invokes the smstart/smstop.
1267 //
1268 // As an example, the following block contains a normal call from a
1269 // streaming-compatible function:
1270 //
1271 // OrigBB:
1272 // MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTOP
1273 // bl @normal_callee
1274 // MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask> <- Cond SMSTART
1275 //
1276 // ...which will be transformed into:
1277 //
1278 // OrigBB:
1279 // TBNZx %0:gpr64, 0, SMBB
1280 // b EndBB
1281 //
1282 // SMBB:
1283 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP
1284 //
1285 // EndBB:
1286 // bl @normal_callee
1287 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART
1288 //
1289 DebugLoc DL = MI.getDebugLoc();
1290
1291 // Create the conditional branch based on the third operand of the
1292 // instruction, which tells us if we are wrapping a normal or streaming
1293 // function.
1294 // We test the live value of pstate.sm and toggle pstate.sm if this is not the
1295 // expected value for the callee (0 for a normal callee and 1 for a streaming
1296 // callee).
1297 unsigned Opc;
1298 switch (MI.getOperand(2).getImm()) {
1299 case AArch64SME::Always:
1300 llvm_unreachable("Should have matched to instruction directly");
1302 Opc = AArch64::TBNZW;
1303 break;
1305 Opc = AArch64::TBZW;
1306 break;
1307 }
1308 auto PStateSM = MI.getOperand(3).getReg();
1310 unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
1311 MachineInstrBuilder Tbx =
1312 BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);
1313
1314 auto [CondBB, EndBB] = expandConditionalPseudo(MBB, MBBI, DL, Tbx);
1315 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1316 MachineInstrBuilder MIB = BuildMI(CondBB, CondBB.back(), MI.getDebugLoc(),
1317 TII->get(AArch64::MSRpstatesvcrImm1));
1318 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1319 // these contain the CopyFromReg for the first argument and the flag to
1320 // indicate whether the callee is streaming or normal).
1321 MIB.add(MI.getOperand(0));
1322 MIB.add(MI.getOperand(1));
1323 for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1324 MIB.add(MI.getOperand(i));
1325
1326 MI.eraseFromParent();
1327 return &EndBB;
1328}
1329
1330bool AArch64ExpandPseudoImpl::expandMultiVecPseudo(
1331 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1332 TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1333 unsigned ContiguousOp, unsigned StridedOpc) {
1334 MachineInstr &MI = *MBBI;
1335 Register Tuple = MI.getOperand(0).getReg();
1336
1337 auto ContiguousRange = ContiguousClass.getRegisters();
1338 auto StridedRange = StridedClass.getRegisters();
1339 unsigned Opc;
1340 if (llvm::is_contained(ContiguousRange, Tuple.asMCReg())) {
1341 Opc = ContiguousOp;
1342 } else if (llvm::is_contained(StridedRange, Tuple.asMCReg())) {
1343 Opc = StridedOpc;
1344 } else
1345 llvm_unreachable("Cannot expand Multi-Vector pseudo");
1346
1347 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
1348 .add(MI.getOperand(0))
1349 .add(MI.getOperand(1))
1350 .add(MI.getOperand(2))
1351 .add(MI.getOperand(3));
1352 transferImpOps(MI, MIB, MIB);
1353 MI.eraseFromParent();
1354 return true;
1355}
1356
1357bool AArch64ExpandPseudoImpl::expandFormTuplePseudo(
1358 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1359 MachineBasicBlock::iterator &NextMBBI, unsigned Size) {
1360 assert((Size == 2 || Size == 4) && "Invalid Tuple Size");
1361 MachineInstr &MI = *MBBI;
1362 Register ReturnTuple = MI.getOperand(0).getReg();
1363
1364 const TargetRegisterInfo *TRI =
1366 for (unsigned I = 0; I < Size; ++I) {
1367 Register FormTupleOpReg = MI.getOperand(I + 1).getReg();
1368 Register ReturnTupleSubReg =
1369 TRI->getSubReg(ReturnTuple, AArch64::zsub0 + I);
1370 // Add copies to ensure the subregisters remain in the correct order
1371 // for any contigious operation they are used by.
1372 if (FormTupleOpReg != ReturnTupleSubReg)
1373 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORR_ZZZ))
1374 .addReg(ReturnTupleSubReg, RegState::Define)
1375 .addReg(FormTupleOpReg)
1376 .addReg(FormTupleOpReg);
1377 }
1378
1379 MI.eraseFromParent();
1380 return true;
1381}
1382
1383/// If MBBI references a pseudo instruction that should be expanded here,
1384/// do the expansion and return true. Otherwise return false.
1385bool AArch64ExpandPseudoImpl::expandMI(MachineBasicBlock &MBB,
1387 MachineBasicBlock::iterator &NextMBBI) {
1388 MachineInstr &MI = *MBBI;
1389 unsigned Opcode = MI.getOpcode();
1390
1391 // Check if we can expand the destructive op
1392 int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
1393 if (OrigInstr != -1) {
1394 auto &Orig = TII->get(OrigInstr);
1395 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1397 return expand_DestructiveOp(MI, MBB, MBBI);
1398 }
1399 }
1400
1401 switch (Opcode) {
1402 default:
1403 break;
1404
1405 case AArch64::BSPv8i8:
1406 case AArch64::BSPv16i8: {
1407 Register DstReg = MI.getOperand(0).getReg();
1408 if (DstReg == MI.getOperand(3).getReg()) {
1409 // Expand to BIT
1410 auto I = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1411 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1412 : AArch64::BITv16i8))
1413 .add(MI.getOperand(0))
1414 .add(MI.getOperand(3))
1415 .add(MI.getOperand(2))
1416 .add(MI.getOperand(1));
1417 transferImpOps(MI, I, I);
1418 } else if (DstReg == MI.getOperand(2).getReg()) {
1419 // Expand to BIF
1420 auto I = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1421 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1422 : AArch64::BIFv16i8))
1423 .add(MI.getOperand(0))
1424 .add(MI.getOperand(2))
1425 .add(MI.getOperand(3))
1426 .add(MI.getOperand(1));
1427 transferImpOps(MI, I, I);
1428 } else {
1429 // Expand to BSL, use additional move if required
1430 if (DstReg == MI.getOperand(1).getReg()) {
1431 auto I =
1432 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1433 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1434 : AArch64::BSLv16i8))
1435 .add(MI.getOperand(0))
1436 .add(MI.getOperand(1))
1437 .add(MI.getOperand(2))
1438 .add(MI.getOperand(3));
1439 transferImpOps(MI, I, I);
1440 } else {
1442 getRenamableRegState(MI.getOperand(1).isRenamable()) |
1444 MI.getOperand(1).isKill() &&
1445 MI.getOperand(1).getReg() != MI.getOperand(2).getReg() &&
1446 MI.getOperand(1).getReg() != MI.getOperand(3).getReg());
1447 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1448 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1449 : AArch64::ORRv16i8))
1450 .addReg(DstReg,
1451 RegState::Define |
1452 getRenamableRegState(MI.getOperand(0).isRenamable()))
1453 .addReg(MI.getOperand(1).getReg(), RegState)
1454 .addReg(MI.getOperand(1).getReg(), RegState);
1455 auto I2 =
1456 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1457 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1458 : AArch64::BSLv16i8))
1459 .add(MI.getOperand(0))
1460 .addReg(DstReg,
1461 RegState::Kill | getRenamableRegState(
1462 MI.getOperand(0).isRenamable()))
1463 .add(MI.getOperand(2))
1464 .add(MI.getOperand(3));
1465 transferImpOps(MI, I2, I2);
1466 }
1467 }
1468 MI.eraseFromParent();
1469 return true;
1470 }
1471
1472 case AArch64::ADDWrr:
1473 case AArch64::SUBWrr:
1474 case AArch64::ADDXrr:
1475 case AArch64::SUBXrr:
1476 case AArch64::ADDSWrr:
1477 case AArch64::SUBSWrr:
1478 case AArch64::ADDSXrr:
1479 case AArch64::SUBSXrr:
1480 case AArch64::ANDWrr:
1481 case AArch64::ANDXrr:
1482 case AArch64::BICWrr:
1483 case AArch64::BICXrr:
1484 case AArch64::ANDSWrr:
1485 case AArch64::ANDSXrr:
1486 case AArch64::BICSWrr:
1487 case AArch64::BICSXrr:
1488 case AArch64::EONWrr:
1489 case AArch64::EONXrr:
1490 case AArch64::EORWrr:
1491 case AArch64::EORXrr:
1492 case AArch64::ORNWrr:
1493 case AArch64::ORNXrr:
1494 case AArch64::ORRWrr:
1495 case AArch64::ORRXrr: {
1496 unsigned Opcode;
1497 switch (MI.getOpcode()) {
1498 default:
1499 return false;
1500 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
1501 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
1502 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
1503 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
1504 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
1505 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
1506 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
1507 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
1508 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
1509 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
1510 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
1511 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
1512 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
1513 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
1514 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
1515 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
1516 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
1517 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
1518 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
1519 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
1520 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
1521 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
1522 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
1523 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
1524 }
1525 MachineFunction &MF = *MBB.getParent();
1526 // Try to create new inst without implicit operands added.
1527 MachineInstr *NewMI = MF.CreateMachineInstr(
1528 TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
1529 MBB.insert(MBBI, NewMI);
1530 MachineInstrBuilder MIB1(MF, NewMI);
1531 MIB1->setPCSections(MF, MI.getPCSections());
1532 MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
1533 .add(MI.getOperand(1))
1534 .add(MI.getOperand(2))
1536 transferImpOps(MI, MIB1, MIB1);
1537 if (auto DebugNumber = MI.peekDebugInstrNum())
1538 NewMI->setDebugInstrNum(DebugNumber);
1539 MI.eraseFromParent();
1540 return true;
1541 }
1542
1543 case AArch64::LOADgot: {
1544 MachineFunction *MF = MBB.getParent();
1545 Register DstReg = MI.getOperand(0).getReg();
1546 const MachineOperand &MO1 = MI.getOperand(1);
1547 unsigned Flags = MO1.getTargetFlags();
1548
1549 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1550 // Tiny codemodel expand to LDR
1551 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1552 TII->get(AArch64::LDRXl), DstReg);
1553
1554 if (MO1.isGlobal()) {
1555 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
1556 } else if (MO1.isSymbol()) {
1557 MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
1558 } else {
1559 assert(MO1.isCPI() &&
1560 "Only expect globals, externalsymbols, or constant pools");
1561 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
1562 }
1563 } else {
1564 // Small codemodel expand into ADRP + LDR.
1565 MachineFunction &MF = *MI.getParent()->getParent();
1566 DebugLoc DL = MI.getDebugLoc();
1567 MachineInstrBuilder MIB1 =
1568 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
1569
1570 MachineInstrBuilder MIB2;
1571 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1573 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
1574 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
1575 .addDef(Reg32)
1576 .addReg(DstReg, RegState::Kill)
1577 .addReg(DstReg, RegState::Implicit);
1578 } else {
1579 Register DstReg = MI.getOperand(0).getReg();
1580 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
1581 .add(MI.getOperand(0))
1582 .addUse(DstReg, RegState::Kill);
1583 }
1584
1585 if (MO1.isGlobal()) {
1586 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
1587 MIB2.addGlobalAddress(MO1.getGlobal(), 0,
1589 } else if (MO1.isSymbol()) {
1591 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
1594 } else {
1595 assert(MO1.isCPI() &&
1596 "Only expect globals, externalsymbols, or constant pools");
1597 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1598 Flags | AArch64II::MO_PAGE);
1599 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1600 Flags | AArch64II::MO_PAGEOFF |
1602 }
1603
1604 // If the LOADgot instruction has a debug-instr-number, annotate the
1605 // LDRWui instruction that it is expanded to with the same
1606 // debug-instr-number to preserve debug information.
1607 if (MI.peekDebugInstrNum() != 0)
1608 MIB2->setDebugInstrNum(MI.peekDebugInstrNum());
1609 transferImpOps(MI, MIB1, MIB2);
1610 }
1611 MI.eraseFromParent();
1612 return true;
1613 }
1614 case AArch64::MOVaddrBA: {
1615 MachineFunction &MF = *MI.getParent()->getParent();
1616 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1617 // blockaddress expressions have to come from a constant pool because the
1618 // largest addend (and hence offset within a function) allowed for ADRP is
1619 // only 8MB.
1620 const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
1621 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1622
1623 MachineConstantPool *MCP = MF.getConstantPool();
1624 unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
1625
1626 Register DstReg = MI.getOperand(0).getReg();
1627 auto MIB1 =
1628 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1630 auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1631 TII->get(AArch64::LDRXui), DstReg)
1632 .addUse(DstReg)
1635 transferImpOps(MI, MIB1, MIB2);
1636 MI.eraseFromParent();
1637 return true;
1638 }
1639 }
1640 [[fallthrough]];
1641 case AArch64::MOVaddr:
1642 case AArch64::MOVaddrJT:
1643 case AArch64::MOVaddrCP:
1644 case AArch64::MOVaddrTLS:
1645 case AArch64::MOVaddrEXT: {
1646 // Expand into ADRP + ADD.
1647 Register DstReg = MI.getOperand(0).getReg();
1648 assert(DstReg != AArch64::XZR);
1649 MachineInstrBuilder MIB1 =
1650 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1651 .add(MI.getOperand(1));
1652
1653 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1654 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1655 // We do so by creating a MOVK that sets bits 48-63 of the register to
1656 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1657 // the small code model so we can assume a binary size of <= 4GB, which
1658 // makes the untagged PC relative offset positive. The binary must also be
1659 // loaded into address range [0, 2^48). Both of these properties need to
1660 // be ensured at runtime when using tagged addresses.
1661 auto Tag = MI.getOperand(1);
1662 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1663 Tag.setOffset(0x100000000);
1664 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1665 .addReg(DstReg)
1666 .add(Tag)
1667 .addImm(48);
1668 }
1669
1670 MachineInstrBuilder MIB2 =
1671 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1672 .add(MI.getOperand(0))
1673 .addReg(DstReg)
1674 .add(MI.getOperand(2))
1675 .addImm(0);
1676
1677 transferImpOps(MI, MIB1, MIB2);
1678 MI.eraseFromParent();
1679 return true;
1680 }
1681 case AArch64::ADDlowTLS:
1682 // Produce a plain ADD
1683 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1684 .add(MI.getOperand(0))
1685 .add(MI.getOperand(1))
1686 .add(MI.getOperand(2))
1687 .addImm(0);
1688 MI.eraseFromParent();
1689 return true;
1690
1691 case AArch64::MOVbaseTLS: {
1692 Register DstReg = MI.getOperand(0).getReg();
1693 auto SysReg = AArch64SysReg::TPIDR_EL0;
1694 MachineFunction *MF = MBB.getParent();
1695 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1696 SysReg = AArch64SysReg::TPIDR_EL3;
1697 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1698 SysReg = AArch64SysReg::TPIDR_EL2;
1699 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1700 SysReg = AArch64SysReg::TPIDR_EL1;
1701 else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1702 SysReg = AArch64SysReg::TPIDRRO_EL0;
1703 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1704 .addImm(SysReg);
1705 MI.eraseFromParent();
1706 return true;
1707 }
1708
1709 case AArch64::MOVi32imm:
1710 return expandMOVImm(MBB, MBBI, 32);
1711 case AArch64::MOVi64imm:
1712 return expandMOVImm(MBB, MBBI, 64);
1713 case AArch64::RET_ReallyLR: {
1714 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1715 // function and missing live-ins. We are fine in practice because callee
1716 // saved register handling ensures the register value is restored before
1717 // RET, but we need the undef flag here to appease the MachineVerifier
1718 // liveness checks.
1719 MachineInstrBuilder MIB =
1720 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1721 .addReg(AArch64::LR, RegState::Undef);
1722 transferImpOps(MI, MIB, MIB);
1723 MI.eraseFromParent();
1724 return true;
1725 }
1726 case AArch64::CMP_SWAP_8:
1727 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1728 AArch64::SUBSWrx,
1730 AArch64::WZR, NextMBBI);
1731 case AArch64::CMP_SWAP_16:
1732 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1733 AArch64::SUBSWrx,
1735 AArch64::WZR, NextMBBI);
1736 case AArch64::CMP_SWAP_32:
1737 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1738 AArch64::SUBSWrs,
1740 AArch64::WZR, NextMBBI);
1741 case AArch64::CMP_SWAP_64:
1742 return expandCMP_SWAP(MBB, MBBI,
1743 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1745 AArch64::XZR, NextMBBI);
1746 case AArch64::CMP_SWAP_128:
1747 case AArch64::CMP_SWAP_128_RELEASE:
1748 case AArch64::CMP_SWAP_128_ACQUIRE:
1749 case AArch64::CMP_SWAP_128_MONOTONIC:
1750 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1751
1752 case AArch64::AESMCrrTied:
1753 case AArch64::AESIMCrrTied: {
1754 MachineInstrBuilder MIB =
1755 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1756 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1757 AArch64::AESIMCrr))
1758 .add(MI.getOperand(0))
1759 .add(MI.getOperand(1));
1760 transferImpOps(MI, MIB, MIB);
1761 MI.eraseFromParent();
1762 return true;
1763 }
1764 case AArch64::IRGstack: {
1765 MachineFunction &MF = *MBB.getParent();
1766 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1767 const AArch64FrameLowering *TFI =
1768 MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1769
1770 // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1771 // almost always point to SP-after-prologue; if not, emit a longer
1772 // instruction sequence.
1773 int BaseOffset = -AFI->getTaggedBasePointerOffset();
1774 Register FrameReg;
1775 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1776 MF, BaseOffset, false /*isFixed*/, TargetStackID::Default /*StackID*/,
1777 FrameReg,
1778 /*PreferFP=*/false,
1779 /*ForSimm=*/true);
1780 Register SrcReg = FrameReg;
1781 if (FrameRegOffset) {
1782 // Use output register as temporary.
1783 SrcReg = MI.getOperand(0).getReg();
1784 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1785 FrameRegOffset, TII);
1786 }
1787 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1788 .add(MI.getOperand(0))
1789 .addUse(SrcReg)
1790 .add(MI.getOperand(2));
1791 MI.eraseFromParent();
1792 return true;
1793 }
1794 case AArch64::TAGPstack: {
1795 int64_t Offset = MI.getOperand(2).getImm();
1796 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1797 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1798 .add(MI.getOperand(0))
1799 .add(MI.getOperand(1))
1800 .addImm(std::abs(Offset))
1801 .add(MI.getOperand(4));
1802 MI.eraseFromParent();
1803 return true;
1804 }
1805 case AArch64::STGloop_wback:
1806 case AArch64::STZGloop_wback:
1807 return expandSetTagLoop(MBB, MBBI, NextMBBI);
1808 case AArch64::STGloop:
1809 case AArch64::STZGloop:
1811 "Non-writeback variants of STGloop / STZGloop should not "
1812 "survive past PrologEpilogInserter.");
1813 case AArch64::STR_ZZZZXI:
1814 case AArch64::STR_ZZZZXI_STRIDED_CONTIGUOUS:
1815 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1816 case AArch64::STR_ZZZXI:
1817 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1818 case AArch64::STR_ZZXI:
1819 case AArch64::STR_ZZXI_STRIDED_CONTIGUOUS:
1820 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1821 case AArch64::STR_PPXI:
1822 return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);
1823 case AArch64::LDR_ZZZZXI:
1824 case AArch64::LDR_ZZZZXI_STRIDED_CONTIGUOUS:
1825 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1826 case AArch64::LDR_ZZZXI:
1827 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1828 case AArch64::LDR_ZZXI:
1829 case AArch64::LDR_ZZXI_STRIDED_CONTIGUOUS:
1830 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1831 case AArch64::LDR_PPXI:
1832 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);
1833 case AArch64::BLR_RVMARKER:
1834 case AArch64::BLRA_RVMARKER:
1835 return expandCALL_RVMARKER(MBB, MBBI);
1836 case AArch64::BLR_BTI:
1837 return expandCALL_BTI(MBB, MBBI);
1838 case AArch64::StoreSwiftAsyncContext:
1839 return expandStoreSwiftAsyncContext(MBB, MBBI);
1840 case AArch64::STSHH_ATOMIC_STORE_SZ:
1841 return expandSTSHHAtomicStore(MBB, MBBI);
1842 case AArch64::RestoreZAPseudo:
1843 case AArch64::CommitZASavePseudo:
1844 case AArch64::MSRpstatePseudo: {
1845 auto *NewMBB = [&] {
1846 switch (Opcode) {
1847 case AArch64::RestoreZAPseudo:
1848 return expandRestoreZASave(MBB, MBBI);
1849 case AArch64::CommitZASavePseudo:
1850 return expandCommitZASave(MBB, MBBI);
1851 case AArch64::MSRpstatePseudo:
1852 return expandCondSMToggle(MBB, MBBI);
1853 default:
1854 llvm_unreachable("Unexpected conditional pseudo!");
1855 }
1856 }();
1857 if (NewMBB != &MBB)
1858 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1859 return true;
1860 }
1861 case AArch64::InOutZAUsePseudo:
1862 case AArch64::RequiresZASavePseudo:
1863 case AArch64::RequiresZT0SavePseudo:
1864 case AArch64::SMEStateAllocPseudo:
1865 case AArch64::COALESCER_BARRIER_FPR16:
1866 case AArch64::COALESCER_BARRIER_FPR32:
1867 case AArch64::COALESCER_BARRIER_FPR64:
1868 case AArch64::COALESCER_BARRIER_FPR128:
1869 MI.eraseFromParent();
1870 return true;
1871 case AArch64::LD1B_2Z_IMM_PSEUDO:
1872 return expandMultiVecPseudo(
1873 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1874 AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM);
1875 case AArch64::LD1H_2Z_IMM_PSEUDO:
1876 return expandMultiVecPseudo(
1877 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1878 AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM);
1879 case AArch64::LD1W_2Z_IMM_PSEUDO:
1880 return expandMultiVecPseudo(
1881 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1882 AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM);
1883 case AArch64::LD1D_2Z_IMM_PSEUDO:
1884 return expandMultiVecPseudo(
1885 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1886 AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM);
1887 case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1888 return expandMultiVecPseudo(
1889 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1890 AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM);
1891 case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1892 return expandMultiVecPseudo(
1893 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1894 AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM);
1895 case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1896 return expandMultiVecPseudo(
1897 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1898 AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM);
1899 case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1900 return expandMultiVecPseudo(
1901 MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1902 AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM);
1903 case AArch64::LD1B_2Z_PSEUDO:
1904 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1905 AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z,
1906 AArch64::LD1B_2Z_STRIDED);
1907 case AArch64::LD1H_2Z_PSEUDO:
1908 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1909 AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z,
1910 AArch64::LD1H_2Z_STRIDED);
1911 case AArch64::LD1W_2Z_PSEUDO:
1912 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1913 AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z,
1914 AArch64::LD1W_2Z_STRIDED);
1915 case AArch64::LD1D_2Z_PSEUDO:
1916 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1917 AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z,
1918 AArch64::LD1D_2Z_STRIDED);
1919 case AArch64::LDNT1B_2Z_PSEUDO:
1920 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1921 AArch64::ZPR2StridedRegClass,
1922 AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED);
1923 case AArch64::LDNT1H_2Z_PSEUDO:
1924 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1925 AArch64::ZPR2StridedRegClass,
1926 AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED);
1927 case AArch64::LDNT1W_2Z_PSEUDO:
1928 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1929 AArch64::ZPR2StridedRegClass,
1930 AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED);
1931 case AArch64::LDNT1D_2Z_PSEUDO:
1932 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1933 AArch64::ZPR2StridedRegClass,
1934 AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED);
1935 case AArch64::LD1B_4Z_IMM_PSEUDO:
1936 return expandMultiVecPseudo(
1937 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1938 AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM);
1939 case AArch64::LD1H_4Z_IMM_PSEUDO:
1940 return expandMultiVecPseudo(
1941 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1942 AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM);
1943 case AArch64::LD1W_4Z_IMM_PSEUDO:
1944 return expandMultiVecPseudo(
1945 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1946 AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM);
1947 case AArch64::LD1D_4Z_IMM_PSEUDO:
1948 return expandMultiVecPseudo(
1949 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1950 AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM);
1951 case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1952 return expandMultiVecPseudo(
1953 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1954 AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM);
1955 case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1956 return expandMultiVecPseudo(
1957 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1958 AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM);
1959 case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1960 return expandMultiVecPseudo(
1961 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1962 AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM);
1963 case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1964 return expandMultiVecPseudo(
1965 MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1966 AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM);
1967 case AArch64::LD1B_4Z_PSEUDO:
1968 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1969 AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z,
1970 AArch64::LD1B_4Z_STRIDED);
1971 case AArch64::LD1H_4Z_PSEUDO:
1972 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1973 AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z,
1974 AArch64::LD1H_4Z_STRIDED);
1975 case AArch64::LD1W_4Z_PSEUDO:
1976 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1977 AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z,
1978 AArch64::LD1W_4Z_STRIDED);
1979 case AArch64::LD1D_4Z_PSEUDO:
1980 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1981 AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z,
1982 AArch64::LD1D_4Z_STRIDED);
1983 case AArch64::LDNT1B_4Z_PSEUDO:
1984 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1985 AArch64::ZPR4StridedRegClass,
1986 AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED);
1987 case AArch64::LDNT1H_4Z_PSEUDO:
1988 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1989 AArch64::ZPR4StridedRegClass,
1990 AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED);
1991 case AArch64::LDNT1W_4Z_PSEUDO:
1992 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1993 AArch64::ZPR4StridedRegClass,
1994 AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED);
1995 case AArch64::LDNT1D_4Z_PSEUDO:
1996 return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1997 AArch64::ZPR4StridedRegClass,
1998 AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED);
1999 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO:
2000 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, 2);
2001 case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO:
2002 return expandFormTuplePseudo(MBB, MBBI, NextMBBI, 4);
2003 case AArch64::EON_ZZZ:
2004 case AArch64::NAND_ZZZ:
2005 case AArch64::NOR_ZZZ:
2006 return expandSVEBitwisePseudo(MI, MBB, MBBI);
2007 }
2008 return false;
2009}
2010
2011/// Iterate over the instructions in basic block MBB and expand any
2012/// pseudo instructions. Return true if anything was modified.
2013bool AArch64ExpandPseudoImpl::expandMBB(MachineBasicBlock &MBB) {
2014 bool Modified = false;
2015
2017 while (MBBI != E) {
2018 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
2019 Modified |= expandMI(MBB, MBBI, NMBBI);
2020 MBBI = NMBBI;
2021 }
2022
2023 return Modified;
2024}
2025
2026bool AArch64ExpandPseudoImpl::run(MachineFunction &MF) {
2027 TII = MF.getSubtarget<AArch64Subtarget>().getInstrInfo();
2028
2029 bool Modified = false;
2030 for (auto &MBB : MF)
2031 Modified |= expandMBB(MBB);
2032 return Modified;
2033}
2034
2035bool AArch64ExpandPseudoLegacy::runOnMachineFunction(MachineFunction &MF) {
2036 return AArch64ExpandPseudoImpl().run(MF);
2037}
2038
2039/// Returns an instance of the pseudo instruction expansion pass.
2041 return new AArch64ExpandPseudoLegacy();
2042}
2043
2047 const bool Changed = AArch64ExpandPseudoImpl().run(MF);
2048 if (!Changed)
2049 return PreservedAnalyses::all();
2052 return PA;
2053}
#define AARCH64_EXPAND_PSEUDO_NAME
MachineInstrBuilder & UseMI
static MachineInstr * createCallWithOps(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const AArch64InstrInfo *TII, unsigned Opcode, ArrayRef< MachineOperand > ExplicitOps, unsigned RegMaskStartIdx)
static constexpr unsigned ZERO_ALL_ZA_MASK
static MachineInstr * createCall(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const AArch64InstrInfo *TII, MachineOperand &CallTarget, unsigned RegMaskStartIdx)
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition MD5.cpp:57
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
A debug info location.
Definition DebugLoc.h:123
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
Describe properties that are true of each instruction in the target description file.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI MachineBasicBlock * splitAt(MachineInstr &SplitInst, bool UpdateLiveIns=true, LiveIntervals *LIS=nullptr)
Split a basic block into 2 pieces at SplitPoint.
LLVM_ABI void eraseFromParent()
This method unlinks 'this' from the containing function and deletes it.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< succ_iterator > successors()
MachineInstrBundleIterator< MachineInstr > iterator
unsigned getConstantPoolIndex(const Constant *C, Align Alignment)
getConstantPoolIndex - Create a new entry in the constant pool or return an existing one.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void moveAdditionalCallInfo(const MachineInstr *Old, const MachineInstr *New)
Move the call site info from Old to \New call site info.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
MachineConstantPool * getConstantPool()
getConstantPool - Return the constant pool object for the current function.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addUse(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register use operand.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlag(MachineInstr::MIFlag Flag) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addConstantPoolIndex(unsigned Idx, int Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addGlobalAddress(const GlobalValue *GV, int64_t Offset=0, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
void setDebugInstrNum(unsigned Num)
Set instruction number of this MachineInstr.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isSymbol() const
isSymbol - Tests if this is a MO_ExternalSymbol operand.
LLVM_ABI bool isRenamable() const
isRenamable - Returns true if this register may be renamed, i.e.
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
const char * getSymbolName() const
Register getReg() const
getReg - Returns the register number.
static MachineOperand CreateReg(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isEarlyClobber=false, unsigned SubReg=0, bool isDebug=false, bool isInternalRead=false, bool isRenamable=false)
int64_t getOffset() const
Return the offset from the symbol in this operand.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
MCRegister asMCReg() const
Utility to check-convert this value to a MCRegister.
Definition Register.h:107
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
CodeModel::Model getCodeModel() const
Returns the code model.
ArrayRef< MCPhysReg > getRegisters() const
virtual const TargetRegisterInfo * getRegisterInfo() const =0
Return the target's register information.
self_iterator getIterator()
Definition ilist_node.h:123
IteratorT end() const
IteratorT begin() const
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ MO_NC
MO_NC - Indicates whether the linker is expected to check the symbol reference for overflow.
@ MO_PAGEOFF
MO_PAGEOFF - A symbol operand with this flag represents the offset of that symbol within a 4K page.
@ MO_PREL
MO_PREL - Indicates that the bits of the symbol operand represented by MO_G0 etc are PC relative.
@ MO_PAGE
MO_PAGE - A symbol operand with this flag represents the pc-relative offset of the 4K page containing...
@ MO_TAGGED
MO_TAGGED - With MO_PAGE, indicates that the page includes a memory tag in bits 56-63.
@ MO_G3
MO_G3 - A symbol operand with this flag (granule 3) represents the high 16-bits of a 64-bit address,...
static unsigned getArithExtendImm(AArch64_AM::ShiftExtendType ET, unsigned Imm)
getArithExtendImm - Encode the extend type and shift amount for an arithmetic instruction: imm: 3-bit...
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
int32_t getSVERevInstr(uint32_t Opcode)
int32_t getSVENonRevInstr(uint32_t Opcode)
int32_t getSVEPseudoMap(uint32_t Opcode)
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
LLVM_ABI void finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
finalizeBundle - Finalize a machine instruction bundle which includes a sequence of instructions star...
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
RegState
Flags to represent properties of register accesses.
@ Kill
The last use of a register.
constexpr RegState getKillRegState(bool B)
APFloat abs(APFloat X)
Returns the absolute value of the argument.
Definition APFloat.h:1630
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
constexpr RegState getDeadRegState(bool B)
Op::Description Desc
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
FunctionPass * createAArch64ExpandPseudoLegacyPass()
Returns an instance of the pseudo instruction expansion pass.
constexpr RegState getRenamableRegState(bool B)
void emitFrameOffset(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const DebugLoc &DL, unsigned DestReg, unsigned SrcReg, StackOffset Offset, const TargetInstrInfo *TII, MachineInstr::MIFlag=MachineInstr::NoFlags, bool SetNZCV=false, bool NeedsWinCFI=false, bool *HasWinCFI=nullptr, bool EmitCFAOffset=false, StackOffset InitialOffset={}, unsigned FrameReg=AArch64::SP)
emitFrameOffset - Emit instructions as needed to set DestReg to SrcReg plus Offset.
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr RegState getDefRegState(bool B)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition STLExtras.h:1947
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().
constexpr RegState getUndefRegState(bool B)
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N