LLVM 22.0.0git
RISCVExpandAtomicPseudoInsts.cpp
Go to the documentation of this file.
1//===-- RISCVExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands atomic pseudo instructions into
10// target instructions. This pass should be run at the last possible moment,
11// avoiding the possibility for other passes to break the requirements for
12// forward progress in the LR/SC block.
13//
14//===----------------------------------------------------------------------===//
15
16#include "RISCV.h"
17#include "RISCVInstrInfo.h"
18#include "RISCVTargetMachine.h"
19
23
24using namespace llvm;
25
26#define RISCV_EXPAND_ATOMIC_PSEUDO_NAME \
27 "RISC-V atomic pseudo instruction expansion pass"
28
29namespace {
30
31class RISCVExpandAtomicPseudo : public MachineFunctionPass {
32public:
33 const RISCVSubtarget *STI;
34 const RISCVInstrInfo *TII;
35 static char ID;
36
37 RISCVExpandAtomicPseudo() : MachineFunctionPass(ID) {}
38
39 bool runOnMachineFunction(MachineFunction &MF) override;
40
41 StringRef getPassName() const override {
43 }
44
45private:
46 bool expandMBB(MachineBasicBlock &MBB);
49 bool expandAtomicBinOp(MachineBasicBlock &MBB,
51 bool IsMasked, int Width,
53 bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
55 AtomicRMWInst::BinOp, bool IsMasked, int Width,
57 bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
58 MachineBasicBlock::iterator MBBI, bool IsMasked,
59 int Width, MachineBasicBlock::iterator &NextMBBI);
60#ifndef NDEBUG
61 unsigned getInstSizeInBytes(const MachineFunction &MF) const {
62 unsigned Size = 0;
63 for (auto &MBB : MF)
64 for (auto &MI : MBB)
65 Size += TII->getInstSizeInBytes(MI);
66 return Size;
67 }
68#endif
69};
70
71char RISCVExpandAtomicPseudo::ID = 0;
72
73bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
74 STI = &MF.getSubtarget<RISCVSubtarget>();
75 TII = STI->getInstrInfo();
76
77#ifndef NDEBUG
78 const unsigned OldSize = getInstSizeInBytes(MF);
79#endif
80
81 bool Modified = false;
82 for (auto &MBB : MF)
83 Modified |= expandMBB(MBB);
84
85#ifndef NDEBUG
86 const unsigned NewSize = getInstSizeInBytes(MF);
87 assert(OldSize >= NewSize);
88#endif
89 return Modified;
90}
91
92bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
93 bool Modified = false;
94
96 while (MBBI != E) {
97 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
98 Modified |= expandMI(MBB, MBBI, NMBBI);
99 MBBI = NMBBI;
100 }
101
102 return Modified;
103}
104
105bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
107 MachineBasicBlock::iterator &NextMBBI) {
108 // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
109 // expanded instructions for each pseudo is correct in the Size field of the
110 // tablegen definition for the pseudo.
111 switch (MBBI->getOpcode()) {
112 case RISCV::PseudoAtomicLoadNand32:
113 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
114 NextMBBI);
115 case RISCV::PseudoAtomicLoadNand64:
116 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
117 NextMBBI);
118 case RISCV::PseudoMaskedAtomicSwap32:
119 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
120 NextMBBI);
121 case RISCV::PseudoMaskedAtomicLoadAdd32:
122 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
123 case RISCV::PseudoMaskedAtomicLoadSub32:
124 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
125 case RISCV::PseudoMaskedAtomicLoadNand32:
126 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
127 NextMBBI);
128 case RISCV::PseudoMaskedAtomicLoadMax32:
129 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
130 NextMBBI);
131 case RISCV::PseudoMaskedAtomicLoadMin32:
132 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
133 NextMBBI);
134 case RISCV::PseudoMaskedAtomicLoadUMax32:
135 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
136 NextMBBI);
137 case RISCV::PseudoMaskedAtomicLoadUMin32:
138 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
139 NextMBBI);
140 case RISCV::PseudoCmpXchg32:
141 return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
142 case RISCV::PseudoCmpXchg64:
143 return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
144 case RISCV::PseudoMaskedCmpXchg32:
145 return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
146 }
147
148 return false;
149}
150
151static unsigned getLRForRMW32(AtomicOrdering Ordering,
152 const RISCVSubtarget *Subtarget) {
153 switch (Ordering) {
154 default:
155 llvm_unreachable("Unexpected AtomicOrdering");
156 case AtomicOrdering::Monotonic:
157 return RISCV::LR_W;
158 case AtomicOrdering::Acquire:
159 if (Subtarget->hasStdExtZtso())
160 return RISCV::LR_W;
161 return RISCV::LR_W_AQ;
162 case AtomicOrdering::Release:
163 return RISCV::LR_W;
164 case AtomicOrdering::AcquireRelease:
165 if (Subtarget->hasStdExtZtso())
166 return RISCV::LR_W;
167 return RISCV::LR_W_AQ;
168 case AtomicOrdering::SequentiallyConsistent:
169 return RISCV::LR_W_AQ_RL;
170 }
171}
172
173static unsigned getSCForRMW32(AtomicOrdering Ordering,
174 const RISCVSubtarget *Subtarget) {
175 switch (Ordering) {
176 default:
177 llvm_unreachable("Unexpected AtomicOrdering");
178 case AtomicOrdering::Monotonic:
179 return RISCV::SC_W;
180 case AtomicOrdering::Acquire:
181 return RISCV::SC_W;
182 case AtomicOrdering::Release:
183 if (Subtarget->hasStdExtZtso())
184 return RISCV::SC_W;
185 return RISCV::SC_W_RL;
186 case AtomicOrdering::AcquireRelease:
187 if (Subtarget->hasStdExtZtso())
188 return RISCV::SC_W;
189 return RISCV::SC_W_RL;
190 case AtomicOrdering::SequentiallyConsistent:
191 return RISCV::SC_W_RL;
192 }
193}
194
195static unsigned getLRForRMW64(AtomicOrdering Ordering,
196 const RISCVSubtarget *Subtarget) {
197 switch (Ordering) {
198 default:
199 llvm_unreachable("Unexpected AtomicOrdering");
200 case AtomicOrdering::Monotonic:
201 return RISCV::LR_D;
202 case AtomicOrdering::Acquire:
203 if (Subtarget->hasStdExtZtso())
204 return RISCV::LR_D;
205 return RISCV::LR_D_AQ;
206 case AtomicOrdering::Release:
207 return RISCV::LR_D;
208 case AtomicOrdering::AcquireRelease:
209 if (Subtarget->hasStdExtZtso())
210 return RISCV::LR_D;
211 return RISCV::LR_D_AQ;
212 case AtomicOrdering::SequentiallyConsistent:
213 return RISCV::LR_D_AQ_RL;
214 }
215}
216
217static unsigned getSCForRMW64(AtomicOrdering Ordering,
218 const RISCVSubtarget *Subtarget) {
219 switch (Ordering) {
220 default:
221 llvm_unreachable("Unexpected AtomicOrdering");
222 case AtomicOrdering::Monotonic:
223 return RISCV::SC_D;
224 case AtomicOrdering::Acquire:
225 return RISCV::SC_D;
226 case AtomicOrdering::Release:
227 if (Subtarget->hasStdExtZtso())
228 return RISCV::SC_D;
229 return RISCV::SC_D_RL;
230 case AtomicOrdering::AcquireRelease:
231 if (Subtarget->hasStdExtZtso())
232 return RISCV::SC_D;
233 return RISCV::SC_D_RL;
234 case AtomicOrdering::SequentiallyConsistent:
235 return RISCV::SC_D_RL;
236 }
237}
238
239static unsigned getLRForRMW(AtomicOrdering Ordering, int Width,
240 const RISCVSubtarget *Subtarget) {
241 if (Width == 32)
242 return getLRForRMW32(Ordering, Subtarget);
243 if (Width == 64)
244 return getLRForRMW64(Ordering, Subtarget);
245 llvm_unreachable("Unexpected LR width\n");
246}
247
248static unsigned getSCForRMW(AtomicOrdering Ordering, int Width,
249 const RISCVSubtarget *Subtarget) {
250 if (Width == 32)
251 return getSCForRMW32(Ordering, Subtarget);
252 if (Width == 64)
253 return getSCForRMW64(Ordering, Subtarget);
254 llvm_unreachable("Unexpected SC width\n");
255}
256
257static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
258 DebugLoc DL, MachineBasicBlock *ThisMBB,
259 MachineBasicBlock *LoopMBB,
260 MachineBasicBlock *DoneMBB,
261 AtomicRMWInst::BinOp BinOp, int Width,
262 const RISCVSubtarget *STI) {
263 Register DestReg = MI.getOperand(0).getReg();
264 Register ScratchReg = MI.getOperand(1).getReg();
265 Register AddrReg = MI.getOperand(2).getReg();
266 Register IncrReg = MI.getOperand(3).getReg();
267 AtomicOrdering Ordering =
268 static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
269
270 // .loop:
271 // lr.[w|d] dest, (addr)
272 // binop scratch, dest, val
273 // sc.[w|d] scratch, scratch, (addr)
274 // bnez scratch, loop
275 BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)
276 .addReg(AddrReg);
277 switch (BinOp) {
278 default:
279 llvm_unreachable("Unexpected AtomicRMW BinOp");
281 BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
282 .addReg(DestReg)
283 .addReg(IncrReg);
284 BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
285 .addReg(ScratchReg)
286 .addImm(-1);
287 break;
288 }
289 BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg)
290 .addReg(AddrReg)
291 .addReg(ScratchReg);
292 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
293 .addReg(ScratchReg)
294 .addReg(RISCV::X0)
295 .addMBB(LoopMBB);
296}
297
298static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
300 Register OldValReg, Register NewValReg,
301 Register MaskReg, Register ScratchReg) {
302 assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
303 assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
304 assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
305
306 // We select bits from newval and oldval using:
307 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
308 // r = oldval ^ ((oldval ^ newval) & masktargetdata);
309 BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
310 .addReg(OldValReg)
311 .addReg(NewValReg);
312 BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
313 .addReg(ScratchReg)
314 .addReg(MaskReg);
315 BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
316 .addReg(OldValReg)
317 .addReg(ScratchReg);
318}
319
320static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII,
322 MachineBasicBlock *ThisMBB,
323 MachineBasicBlock *LoopMBB,
324 MachineBasicBlock *DoneMBB,
325 AtomicRMWInst::BinOp BinOp, int Width,
326 const RISCVSubtarget *STI) {
327 assert(Width == 32 && "Should never need to expand masked 64-bit operations");
328 Register DestReg = MI.getOperand(0).getReg();
329 Register ScratchReg = MI.getOperand(1).getReg();
330 Register AddrReg = MI.getOperand(2).getReg();
331 Register IncrReg = MI.getOperand(3).getReg();
332 Register MaskReg = MI.getOperand(4).getReg();
333 AtomicOrdering Ordering =
334 static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
335
336 // .loop:
337 // lr.w destreg, (alignedaddr)
338 // binop scratch, destreg, incr
339 // xor scratch, destreg, scratch
340 // and scratch, scratch, masktargetdata
341 // xor scratch, destreg, scratch
342 // sc.w scratch, scratch, (alignedaddr)
343 // bnez scratch, loop
344 BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
345 .addReg(AddrReg);
346 switch (BinOp) {
347 default:
348 llvm_unreachable("Unexpected AtomicRMW BinOp");
350 BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
351 .addReg(IncrReg)
352 .addImm(0);
353 break;
355 BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
356 .addReg(DestReg)
357 .addReg(IncrReg);
358 break;
360 BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
361 .addReg(DestReg)
362 .addReg(IncrReg);
363 break;
365 BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
366 .addReg(DestReg)
367 .addReg(IncrReg);
368 BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
369 .addReg(ScratchReg)
370 .addImm(-1);
371 break;
372 }
373
374 insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
375 ScratchReg);
376
377 BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), ScratchReg)
378 .addReg(AddrReg)
379 .addReg(ScratchReg);
380 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
381 .addReg(ScratchReg)
382 .addReg(RISCV::X0)
383 .addMBB(LoopMBB);
384}
385
386bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
388 AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
389 MachineBasicBlock::iterator &NextMBBI) {
390 MachineInstr &MI = *MBBI;
391 DebugLoc DL = MI.getDebugLoc();
392
394 auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
395 auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
396
397 // Insert new MBBs.
398 MF->insert(++MBB.getIterator(), LoopMBB);
399 MF->insert(++LoopMBB->getIterator(), DoneMBB);
400
401 // Set up successors and transfer remaining instructions to DoneMBB.
402 LoopMBB->addSuccessor(LoopMBB);
403 LoopMBB->addSuccessor(DoneMBB);
404 DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
405 DoneMBB->transferSuccessors(&MBB);
406 MBB.addSuccessor(LoopMBB);
407
408 if (!IsMasked)
409 doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width,
410 STI);
411 else
412 doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
413 Width, STI);
414
415 NextMBBI = MBB.end();
416 MI.eraseFromParent();
417
418 LivePhysRegs LiveRegs;
419 computeAndAddLiveIns(LiveRegs, *LoopMBB);
420 computeAndAddLiveIns(LiveRegs, *DoneMBB);
421
422 return true;
423}
424
425static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
427 Register ShamtReg) {
428 BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
429 .addReg(ValReg)
430 .addReg(ShamtReg);
431 BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
432 .addReg(ValReg)
433 .addReg(ShamtReg);
434}
435
436bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
438 AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
439 MachineBasicBlock::iterator &NextMBBI) {
440 assert(IsMasked == true &&
441 "Should only need to expand masked atomic max/min");
442 assert(Width == 32 && "Should never need to expand masked 64-bit operations");
443
444 MachineInstr &MI = *MBBI;
445 DebugLoc DL = MI.getDebugLoc();
447 auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
448 auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
449 auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
450 auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
451
452 // Insert new MBBs.
453 MF->insert(++MBB.getIterator(), LoopHeadMBB);
454 MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
455 MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
456 MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
457
458 // Set up successors and transfer remaining instructions to DoneMBB.
459 LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
460 LoopHeadMBB->addSuccessor(LoopTailMBB);
461 LoopIfBodyMBB->addSuccessor(LoopTailMBB);
462 LoopTailMBB->addSuccessor(LoopHeadMBB);
463 LoopTailMBB->addSuccessor(DoneMBB);
464 DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
465 DoneMBB->transferSuccessors(&MBB);
466 MBB.addSuccessor(LoopHeadMBB);
467
468 Register DestReg = MI.getOperand(0).getReg();
469 Register Scratch1Reg = MI.getOperand(1).getReg();
470 Register Scratch2Reg = MI.getOperand(2).getReg();
471 Register AddrReg = MI.getOperand(3).getReg();
472 Register IncrReg = MI.getOperand(4).getReg();
473 Register MaskReg = MI.getOperand(5).getReg();
474 bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
475 AtomicOrdering Ordering =
476 static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
477
478 //
479 // .loophead:
480 // lr.w destreg, (alignedaddr)
481 // and scratch2, destreg, mask
482 // mv scratch1, destreg
483 // [sext scratch2 if signed min/max]
484 // ifnochangeneeded scratch2, incr, .looptail
485 BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
486 .addReg(AddrReg);
487 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
488 .addReg(DestReg)
489 .addReg(MaskReg);
490 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
491 .addReg(DestReg)
492 .addImm(0);
493
494 switch (BinOp) {
495 default:
496 llvm_unreachable("Unexpected AtomicRMW BinOp");
497 case AtomicRMWInst::Max: {
498 insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
499 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
500 .addReg(Scratch2Reg)
501 .addReg(IncrReg)
502 .addMBB(LoopTailMBB);
503 break;
504 }
505 case AtomicRMWInst::Min: {
506 insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
507 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
508 .addReg(IncrReg)
509 .addReg(Scratch2Reg)
510 .addMBB(LoopTailMBB);
511 break;
512 }
514 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
515 .addReg(Scratch2Reg)
516 .addReg(IncrReg)
517 .addMBB(LoopTailMBB);
518 break;
520 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
521 .addReg(IncrReg)
522 .addReg(Scratch2Reg)
523 .addMBB(LoopTailMBB);
524 break;
525 }
526
527 // .loopifbody:
528 // xor scratch1, destreg, incr
529 // and scratch1, scratch1, mask
530 // xor scratch1, destreg, scratch1
531 insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
532 MaskReg, Scratch1Reg);
533
534 // .looptail:
535 // sc.w scratch1, scratch1, (addr)
536 // bnez scratch1, loop
537 BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), Scratch1Reg)
538 .addReg(AddrReg)
539 .addReg(Scratch1Reg);
540 BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
541 .addReg(Scratch1Reg)
542 .addReg(RISCV::X0)
543 .addMBB(LoopHeadMBB);
544
545 NextMBBI = MBB.end();
546 MI.eraseFromParent();
547
548 LivePhysRegs LiveRegs;
549 computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
550 computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
551 computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
552 computeAndAddLiveIns(LiveRegs, *DoneMBB);
553
554 return true;
555}
556
557// If a BNE on the cmpxchg comparison result immediately follows the cmpxchg
558// operation, it can be folded into the cmpxchg expansion by
559// modifying the branch within 'LoopHead' (which performs the same
560// comparison). This is a valid transformation because after altering the
561// LoopHead's BNE destination, the BNE following the cmpxchg becomes
562// redundant and and be deleted. In the case of a masked cmpxchg, an
563// appropriate AND and BNE must be matched.
564//
565// On success, returns true and deletes the matching BNE or AND+BNE, sets the
566// LoopHeadBNETarget argument to the target that should be used within the
567// loop head, and removes that block as a successor to MBB.
568bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB,
570 Register DestReg, Register CmpValReg,
571 Register MaskReg,
572 MachineBasicBlock *&LoopHeadBNETarget) {
574 auto E = MBB.end();
575 if (MBBI == E)
576 return false;
578
579 // If we have a masked cmpxchg, match AND dst, DestReg, MaskReg.
580 if (MaskReg.isValid()) {
581 if (MBBI == E || MBBI->getOpcode() != RISCV::AND)
582 return false;
583 Register ANDOp1 = MBBI->getOperand(1).getReg();
584 Register ANDOp2 = MBBI->getOperand(2).getReg();
585 if (!(ANDOp1 == DestReg && ANDOp2 == MaskReg) &&
586 !(ANDOp1 == MaskReg && ANDOp2 == DestReg))
587 return false;
588 // We now expect the BNE to use the result of the AND as an operand.
589 DestReg = MBBI->getOperand(0).getReg();
590 ToErase.push_back(&*MBBI);
591 MBBI = skipDebugInstructionsForward(std::next(MBBI), E);
592 }
593
594 // Match BNE DestReg, MaskReg.
595 if (MBBI == E || MBBI->getOpcode() != RISCV::BNE)
596 return false;
597 Register BNEOp0 = MBBI->getOperand(0).getReg();
598 Register BNEOp1 = MBBI->getOperand(1).getReg();
599 if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) &&
600 !(BNEOp0 == CmpValReg && BNEOp1 == DestReg))
601 return false;
602
603 // Make sure the branch is the only user of the AND.
604 if (MaskReg.isValid()) {
605 if (BNEOp0 == DestReg && !MBBI->getOperand(0).isKill())
606 return false;
607 if (BNEOp1 == DestReg && !MBBI->getOperand(1).isKill())
608 return false;
609 }
610
611 ToErase.push_back(&*MBBI);
612 LoopHeadBNETarget = MBBI->getOperand(2).getMBB();
613 MBBI = skipDebugInstructionsForward(std::next(MBBI), E);
614 if (MBBI != E)
615 return false;
616
617 MBB.removeSuccessor(LoopHeadBNETarget);
618 for (auto *MI : ToErase)
619 MI->eraseFromParent();
620 return true;
621}
622
623bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
625 int Width, MachineBasicBlock::iterator &NextMBBI) {
626 MachineInstr &MI = *MBBI;
627 DebugLoc DL = MI.getDebugLoc();
629 auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
630 auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
631 auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
632
633 Register DestReg = MI.getOperand(0).getReg();
634 Register ScratchReg = MI.getOperand(1).getReg();
635 Register AddrReg = MI.getOperand(2).getReg();
636 Register CmpValReg = MI.getOperand(3).getReg();
637 Register NewValReg = MI.getOperand(4).getReg();
638 Register MaskReg = IsMasked ? MI.getOperand(5).getReg() : Register();
639
640 MachineBasicBlock *LoopHeadBNETarget = DoneMBB;
641 tryToFoldBNEOnCmpXchgResult(MBB, std::next(MBBI), DestReg, CmpValReg, MaskReg,
642 LoopHeadBNETarget);
643
644 // Insert new MBBs.
645 MF->insert(++MBB.getIterator(), LoopHeadMBB);
646 MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
647 MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
648
649 // Set up successors and transfer remaining instructions to DoneMBB.
650 LoopHeadMBB->addSuccessor(LoopTailMBB);
651 LoopHeadMBB->addSuccessor(LoopHeadBNETarget);
652 LoopTailMBB->addSuccessor(DoneMBB);
653 LoopTailMBB->addSuccessor(LoopHeadMBB);
654 DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
655 DoneMBB->transferSuccessors(&MBB);
656 MBB.addSuccessor(LoopHeadMBB);
657
658 AtomicOrdering Ordering =
659 static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
660
661 if (!IsMasked) {
662 // .loophead:
663 // lr.[w|d] dest, (addr)
664 // bne dest, cmpval, done
665 BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
666 DestReg)
667 .addReg(AddrReg);
668 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
669 .addReg(DestReg)
670 .addReg(CmpValReg)
671 .addMBB(LoopHeadBNETarget);
672 // .looptail:
673 // sc.[w|d] scratch, newval, (addr)
674 // bnez scratch, loophead
675 BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
676 ScratchReg)
677 .addReg(AddrReg)
678 .addReg(NewValReg);
679 BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
680 .addReg(ScratchReg)
681 .addReg(RISCV::X0)
682 .addMBB(LoopHeadMBB);
683 } else {
684 // .loophead:
685 // lr.w dest, (addr)
686 // and scratch, dest, mask
687 // bne scratch, cmpval, done
688 Register MaskReg = MI.getOperand(5).getReg();
689 BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
690 DestReg)
691 .addReg(AddrReg);
692 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
693 .addReg(DestReg)
694 .addReg(MaskReg);
695 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
696 .addReg(ScratchReg)
697 .addReg(CmpValReg)
698 .addMBB(LoopHeadBNETarget);
699
700 // .looptail:
701 // xor scratch, dest, newval
702 // and scratch, scratch, mask
703 // xor scratch, dest, scratch
704 // sc.w scratch, scratch, (adrr)
705 // bnez scratch, loophead
706 insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
707 MaskReg, ScratchReg);
708 BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
709 ScratchReg)
710 .addReg(AddrReg)
711 .addReg(ScratchReg);
712 BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
713 .addReg(ScratchReg)
714 .addReg(RISCV::X0)
715 .addMBB(LoopHeadMBB);
716 }
717
718 NextMBBI = MBB.end();
719 MI.eraseFromParent();
720
721 LivePhysRegs LiveRegs;
722 computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
723 computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
724 computeAndAddLiveIns(LiveRegs, *DoneMBB);
725
726 return true;
727}
728
729} // end of anonymous namespace
730
731INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",
733
735 return new RISCVExpandAtomicPseudo();
736}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:56
#define RISCV_EXPAND_ATOMIC_PSEUDO_NAME
static unsigned getInstSizeInBytes(const MachineInstr &MI, const SystemZInstrInfo *TII)
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:721
@ Add
*p = old + v
Definition: Instructions.h:725
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:739
@ Sub
*p = old - v
Definition: Instructions.h:727
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:737
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:743
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:741
@ Nand
*p = ~(old & v)
Definition: Instructions.h:731
A debug info location.
Definition: DebugLoc.h:124
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:52
LLVM_ABI void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
LLVM_ABI void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:72
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:107
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
self_iterator getIterator()
Definition: ilist_node.h:134
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createRISCVExpandAtomicPseudoPass()
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
AtomicOrdering
Atomic ordering for LLVM's memory model.
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().