LLVM 20.0.0git
RISCVExpandAtomicPseudoInsts.cpp
Go to the documentation of this file.
1//===-- RISCVExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands atomic pseudo instructions into
10// target instructions. This pass should be run at the last possible moment,
11// avoiding the possibility for other passes to break the requirements for
12// forward progress in the LR/SC block.
13//
14//===----------------------------------------------------------------------===//
15
16#include "RISCV.h"
17#include "RISCVInstrInfo.h"
18#include "RISCVTargetMachine.h"
19
23
24using namespace llvm;
25
26#define RISCV_EXPAND_ATOMIC_PSEUDO_NAME \
27 "RISC-V atomic pseudo instruction expansion pass"
28
29namespace {
30
31class RISCVExpandAtomicPseudo : public MachineFunctionPass {
32public:
33 const RISCVSubtarget *STI;
34 const RISCVInstrInfo *TII;
35 static char ID;
36
37 RISCVExpandAtomicPseudo() : MachineFunctionPass(ID) {
39 }
40
41 bool runOnMachineFunction(MachineFunction &MF) override;
42
43 StringRef getPassName() const override {
45 }
46
47private:
48 bool expandMBB(MachineBasicBlock &MBB);
51 bool expandAtomicBinOp(MachineBasicBlock &MBB,
53 bool IsMasked, int Width,
55 bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
57 AtomicRMWInst::BinOp, bool IsMasked, int Width,
59 bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
60 MachineBasicBlock::iterator MBBI, bool IsMasked,
61 int Width, MachineBasicBlock::iterator &NextMBBI);
62#ifndef NDEBUG
63 unsigned getInstSizeInBytes(const MachineFunction &MF) const {
64 unsigned Size = 0;
65 for (auto &MBB : MF)
66 for (auto &MI : MBB)
67 Size += TII->getInstSizeInBytes(MI);
68 return Size;
69 }
70#endif
71};
72
73char RISCVExpandAtomicPseudo::ID = 0;
74
75bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
76 STI = &MF.getSubtarget<RISCVSubtarget>();
77 TII = STI->getInstrInfo();
78
79#ifndef NDEBUG
80 const unsigned OldSize = getInstSizeInBytes(MF);
81#endif
82
83 bool Modified = false;
84 for (auto &MBB : MF)
85 Modified |= expandMBB(MBB);
86
87#ifndef NDEBUG
88 const unsigned NewSize = getInstSizeInBytes(MF);
89 assert(OldSize >= NewSize);
90#endif
91 return Modified;
92}
93
94bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
95 bool Modified = false;
96
98 while (MBBI != E) {
99 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
100 Modified |= expandMI(MBB, MBBI, NMBBI);
101 MBBI = NMBBI;
102 }
103
104 return Modified;
105}
106
107bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
109 MachineBasicBlock::iterator &NextMBBI) {
110 // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
111 // expanded instructions for each pseudo is correct in the Size field of the
112 // tablegen definition for the pseudo.
113 switch (MBBI->getOpcode()) {
114 case RISCV::PseudoAtomicLoadNand32:
115 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
116 NextMBBI);
117 case RISCV::PseudoAtomicLoadNand64:
118 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
119 NextMBBI);
120 case RISCV::PseudoMaskedAtomicSwap32:
121 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
122 NextMBBI);
123 case RISCV::PseudoMaskedAtomicLoadAdd32:
124 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
125 case RISCV::PseudoMaskedAtomicLoadSub32:
126 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
127 case RISCV::PseudoMaskedAtomicLoadNand32:
128 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
129 NextMBBI);
130 case RISCV::PseudoMaskedAtomicLoadMax32:
131 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
132 NextMBBI);
133 case RISCV::PseudoMaskedAtomicLoadMin32:
134 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
135 NextMBBI);
136 case RISCV::PseudoMaskedAtomicLoadUMax32:
137 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
138 NextMBBI);
139 case RISCV::PseudoMaskedAtomicLoadUMin32:
140 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
141 NextMBBI);
142 case RISCV::PseudoCmpXchg32:
143 return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
144 case RISCV::PseudoCmpXchg64:
145 return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
146 case RISCV::PseudoMaskedCmpXchg32:
147 return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
148 }
149
150 return false;
151}
152
153static unsigned getLRForRMW32(AtomicOrdering Ordering,
154 const RISCVSubtarget *Subtarget) {
155 switch (Ordering) {
156 default:
157 llvm_unreachable("Unexpected AtomicOrdering");
158 case AtomicOrdering::Monotonic:
159 return RISCV::LR_W;
160 case AtomicOrdering::Acquire:
161 if (Subtarget->hasStdExtZtso())
162 return RISCV::LR_W;
163 return RISCV::LR_W_AQ;
164 case AtomicOrdering::Release:
165 return RISCV::LR_W;
166 case AtomicOrdering::AcquireRelease:
167 if (Subtarget->hasStdExtZtso())
168 return RISCV::LR_W;
169 return RISCV::LR_W_AQ;
170 case AtomicOrdering::SequentiallyConsistent:
171 return RISCV::LR_W_AQ_RL;
172 }
173}
174
175static unsigned getSCForRMW32(AtomicOrdering Ordering,
176 const RISCVSubtarget *Subtarget) {
177 switch (Ordering) {
178 default:
179 llvm_unreachable("Unexpected AtomicOrdering");
180 case AtomicOrdering::Monotonic:
181 return RISCV::SC_W;
182 case AtomicOrdering::Acquire:
183 return RISCV::SC_W;
184 case AtomicOrdering::Release:
185 if (Subtarget->hasStdExtZtso())
186 return RISCV::SC_W;
187 return RISCV::SC_W_RL;
188 case AtomicOrdering::AcquireRelease:
189 if (Subtarget->hasStdExtZtso())
190 return RISCV::SC_W;
191 return RISCV::SC_W_RL;
192 case AtomicOrdering::SequentiallyConsistent:
193 return RISCV::SC_W_RL;
194 }
195}
196
197static unsigned getLRForRMW64(AtomicOrdering Ordering,
198 const RISCVSubtarget *Subtarget) {
199 switch (Ordering) {
200 default:
201 llvm_unreachable("Unexpected AtomicOrdering");
202 case AtomicOrdering::Monotonic:
203 return RISCV::LR_D;
204 case AtomicOrdering::Acquire:
205 if (Subtarget->hasStdExtZtso())
206 return RISCV::LR_D;
207 return RISCV::LR_D_AQ;
208 case AtomicOrdering::Release:
209 return RISCV::LR_D;
210 case AtomicOrdering::AcquireRelease:
211 if (Subtarget->hasStdExtZtso())
212 return RISCV::LR_D;
213 return RISCV::LR_D_AQ;
214 case AtomicOrdering::SequentiallyConsistent:
215 return RISCV::LR_D_AQ_RL;
216 }
217}
218
219static unsigned getSCForRMW64(AtomicOrdering Ordering,
220 const RISCVSubtarget *Subtarget) {
221 switch (Ordering) {
222 default:
223 llvm_unreachable("Unexpected AtomicOrdering");
224 case AtomicOrdering::Monotonic:
225 return RISCV::SC_D;
226 case AtomicOrdering::Acquire:
227 return RISCV::SC_D;
228 case AtomicOrdering::Release:
229 if (Subtarget->hasStdExtZtso())
230 return RISCV::SC_D;
231 return RISCV::SC_D_RL;
232 case AtomicOrdering::AcquireRelease:
233 if (Subtarget->hasStdExtZtso())
234 return RISCV::SC_D;
235 return RISCV::SC_D_RL;
236 case AtomicOrdering::SequentiallyConsistent:
237 return RISCV::SC_D_RL;
238 }
239}
240
241static unsigned getLRForRMW(AtomicOrdering Ordering, int Width,
242 const RISCVSubtarget *Subtarget) {
243 if (Width == 32)
244 return getLRForRMW32(Ordering, Subtarget);
245 if (Width == 64)
246 return getLRForRMW64(Ordering, Subtarget);
247 llvm_unreachable("Unexpected LR width\n");
248}
249
250static unsigned getSCForRMW(AtomicOrdering Ordering, int Width,
251 const RISCVSubtarget *Subtarget) {
252 if (Width == 32)
253 return getSCForRMW32(Ordering, Subtarget);
254 if (Width == 64)
255 return getSCForRMW64(Ordering, Subtarget);
256 llvm_unreachable("Unexpected SC width\n");
257}
258
259static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
260 DebugLoc DL, MachineBasicBlock *ThisMBB,
261 MachineBasicBlock *LoopMBB,
262 MachineBasicBlock *DoneMBB,
263 AtomicRMWInst::BinOp BinOp, int Width,
264 const RISCVSubtarget *STI) {
265 Register DestReg = MI.getOperand(0).getReg();
266 Register ScratchReg = MI.getOperand(1).getReg();
267 Register AddrReg = MI.getOperand(2).getReg();
268 Register IncrReg = MI.getOperand(3).getReg();
269 AtomicOrdering Ordering =
270 static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
271
272 // .loop:
273 // lr.[w|d] dest, (addr)
274 // binop scratch, dest, val
275 // sc.[w|d] scratch, scratch, (addr)
276 // bnez scratch, loop
277 BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)
278 .addReg(AddrReg);
279 switch (BinOp) {
280 default:
281 llvm_unreachable("Unexpected AtomicRMW BinOp");
283 BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
284 .addReg(DestReg)
285 .addReg(IncrReg);
286 BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
287 .addReg(ScratchReg)
288 .addImm(-1);
289 break;
290 }
291 BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg)
292 .addReg(AddrReg)
293 .addReg(ScratchReg);
294 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
295 .addReg(ScratchReg)
296 .addReg(RISCV::X0)
297 .addMBB(LoopMBB);
298}
299
300static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
302 Register OldValReg, Register NewValReg,
303 Register MaskReg, Register ScratchReg) {
304 assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
305 assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
306 assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
307
308 // We select bits from newval and oldval using:
309 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
310 // r = oldval ^ ((oldval ^ newval) & masktargetdata);
311 BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
312 .addReg(OldValReg)
313 .addReg(NewValReg);
314 BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
315 .addReg(ScratchReg)
316 .addReg(MaskReg);
317 BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
318 .addReg(OldValReg)
319 .addReg(ScratchReg);
320}
321
322static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII,
324 MachineBasicBlock *ThisMBB,
325 MachineBasicBlock *LoopMBB,
326 MachineBasicBlock *DoneMBB,
327 AtomicRMWInst::BinOp BinOp, int Width,
328 const RISCVSubtarget *STI) {
329 assert(Width == 32 && "Should never need to expand masked 64-bit operations");
330 Register DestReg = MI.getOperand(0).getReg();
331 Register ScratchReg = MI.getOperand(1).getReg();
332 Register AddrReg = MI.getOperand(2).getReg();
333 Register IncrReg = MI.getOperand(3).getReg();
334 Register MaskReg = MI.getOperand(4).getReg();
335 AtomicOrdering Ordering =
336 static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
337
338 // .loop:
339 // lr.w destreg, (alignedaddr)
340 // binop scratch, destreg, incr
341 // xor scratch, destreg, scratch
342 // and scratch, scratch, masktargetdata
343 // xor scratch, destreg, scratch
344 // sc.w scratch, scratch, (alignedaddr)
345 // bnez scratch, loop
346 BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
347 .addReg(AddrReg);
348 switch (BinOp) {
349 default:
350 llvm_unreachable("Unexpected AtomicRMW BinOp");
352 BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
353 .addReg(IncrReg)
354 .addImm(0);
355 break;
357 BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
358 .addReg(DestReg)
359 .addReg(IncrReg);
360 break;
362 BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
363 .addReg(DestReg)
364 .addReg(IncrReg);
365 break;
367 BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
368 .addReg(DestReg)
369 .addReg(IncrReg);
370 BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
371 .addReg(ScratchReg)
372 .addImm(-1);
373 break;
374 }
375
376 insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
377 ScratchReg);
378
379 BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), ScratchReg)
380 .addReg(AddrReg)
381 .addReg(ScratchReg);
382 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
383 .addReg(ScratchReg)
384 .addReg(RISCV::X0)
385 .addMBB(LoopMBB);
386}
387
388bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
390 AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
391 MachineBasicBlock::iterator &NextMBBI) {
392 MachineInstr &MI = *MBBI;
393 DebugLoc DL = MI.getDebugLoc();
394
396 auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
397 auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
398
399 // Insert new MBBs.
400 MF->insert(++MBB.getIterator(), LoopMBB);
401 MF->insert(++LoopMBB->getIterator(), DoneMBB);
402
403 // Set up successors and transfer remaining instructions to DoneMBB.
404 LoopMBB->addSuccessor(LoopMBB);
405 LoopMBB->addSuccessor(DoneMBB);
406 DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
407 DoneMBB->transferSuccessors(&MBB);
408 MBB.addSuccessor(LoopMBB);
409
410 if (!IsMasked)
411 doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width,
412 STI);
413 else
414 doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
415 Width, STI);
416
417 NextMBBI = MBB.end();
418 MI.eraseFromParent();
419
420 LivePhysRegs LiveRegs;
421 computeAndAddLiveIns(LiveRegs, *LoopMBB);
422 computeAndAddLiveIns(LiveRegs, *DoneMBB);
423
424 return true;
425}
426
427static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
429 Register ShamtReg) {
430 BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
431 .addReg(ValReg)
432 .addReg(ShamtReg);
433 BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
434 .addReg(ValReg)
435 .addReg(ShamtReg);
436}
437
438bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
440 AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
441 MachineBasicBlock::iterator &NextMBBI) {
442 assert(IsMasked == true &&
443 "Should only need to expand masked atomic max/min");
444 assert(Width == 32 && "Should never need to expand masked 64-bit operations");
445
446 MachineInstr &MI = *MBBI;
447 DebugLoc DL = MI.getDebugLoc();
449 auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
450 auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
451 auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
452 auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
453
454 // Insert new MBBs.
455 MF->insert(++MBB.getIterator(), LoopHeadMBB);
456 MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
457 MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
458 MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
459
460 // Set up successors and transfer remaining instructions to DoneMBB.
461 LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
462 LoopHeadMBB->addSuccessor(LoopTailMBB);
463 LoopIfBodyMBB->addSuccessor(LoopTailMBB);
464 LoopTailMBB->addSuccessor(LoopHeadMBB);
465 LoopTailMBB->addSuccessor(DoneMBB);
466 DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
467 DoneMBB->transferSuccessors(&MBB);
468 MBB.addSuccessor(LoopHeadMBB);
469
470 Register DestReg = MI.getOperand(0).getReg();
471 Register Scratch1Reg = MI.getOperand(1).getReg();
472 Register Scratch2Reg = MI.getOperand(2).getReg();
473 Register AddrReg = MI.getOperand(3).getReg();
474 Register IncrReg = MI.getOperand(4).getReg();
475 Register MaskReg = MI.getOperand(5).getReg();
476 bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
477 AtomicOrdering Ordering =
478 static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
479
480 //
481 // .loophead:
482 // lr.w destreg, (alignedaddr)
483 // and scratch2, destreg, mask
484 // mv scratch1, destreg
485 // [sext scratch2 if signed min/max]
486 // ifnochangeneeded scratch2, incr, .looptail
487 BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
488 .addReg(AddrReg);
489 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
490 .addReg(DestReg)
491 .addReg(MaskReg);
492 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
493 .addReg(DestReg)
494 .addImm(0);
495
496 switch (BinOp) {
497 default:
498 llvm_unreachable("Unexpected AtomicRMW BinOp");
499 case AtomicRMWInst::Max: {
500 insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
501 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
502 .addReg(Scratch2Reg)
503 .addReg(IncrReg)
504 .addMBB(LoopTailMBB);
505 break;
506 }
507 case AtomicRMWInst::Min: {
508 insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
509 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
510 .addReg(IncrReg)
511 .addReg(Scratch2Reg)
512 .addMBB(LoopTailMBB);
513 break;
514 }
516 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
517 .addReg(Scratch2Reg)
518 .addReg(IncrReg)
519 .addMBB(LoopTailMBB);
520 break;
522 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
523 .addReg(IncrReg)
524 .addReg(Scratch2Reg)
525 .addMBB(LoopTailMBB);
526 break;
527 }
528
529 // .loopifbody:
530 // xor scratch1, destreg, incr
531 // and scratch1, scratch1, mask
532 // xor scratch1, destreg, scratch1
533 insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
534 MaskReg, Scratch1Reg);
535
536 // .looptail:
537 // sc.w scratch1, scratch1, (addr)
538 // bnez scratch1, loop
539 BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), Scratch1Reg)
540 .addReg(AddrReg)
541 .addReg(Scratch1Reg);
542 BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
543 .addReg(Scratch1Reg)
544 .addReg(RISCV::X0)
545 .addMBB(LoopHeadMBB);
546
547 NextMBBI = MBB.end();
548 MI.eraseFromParent();
549
550 LivePhysRegs LiveRegs;
551 computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
552 computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
553 computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
554 computeAndAddLiveIns(LiveRegs, *DoneMBB);
555
556 return true;
557}
558
559// If a BNE on the cmpxchg comparison result immediately follows the cmpxchg
560// operation, it can be folded into the cmpxchg expansion by
561// modifying the branch within 'LoopHead' (which performs the same
562// comparison). This is a valid transformation because after altering the
563// LoopHead's BNE destination, the BNE following the cmpxchg becomes
564// redundant and and be deleted. In the case of a masked cmpxchg, an
565// appropriate AND and BNE must be matched.
566//
567// On success, returns true and deletes the matching BNE or AND+BNE, sets the
568// LoopHeadBNETarget argument to the target that should be used within the
569// loop head, and removes that block as a successor to MBB.
570bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB,
572 Register DestReg, Register CmpValReg,
573 Register MaskReg,
574 MachineBasicBlock *&LoopHeadBNETarget) {
576 auto E = MBB.end();
577 if (MBBI == E)
578 return false;
580
581 // If we have a masked cmpxchg, match AND dst, DestReg, MaskReg.
582 if (MaskReg.isValid()) {
583 if (MBBI == E || MBBI->getOpcode() != RISCV::AND)
584 return false;
585 Register ANDOp1 = MBBI->getOperand(1).getReg();
586 Register ANDOp2 = MBBI->getOperand(2).getReg();
587 if (!(ANDOp1 == DestReg && ANDOp2 == MaskReg) &&
588 !(ANDOp1 == MaskReg && ANDOp2 == DestReg))
589 return false;
590 // We now expect the BNE to use the result of the AND as an operand.
591 DestReg = MBBI->getOperand(0).getReg();
592 ToErase.push_back(&*MBBI);
593 MBBI = skipDebugInstructionsForward(std::next(MBBI), E);
594 }
595
596 // Match BNE DestReg, MaskReg.
597 if (MBBI == E || MBBI->getOpcode() != RISCV::BNE)
598 return false;
599 Register BNEOp0 = MBBI->getOperand(0).getReg();
600 Register BNEOp1 = MBBI->getOperand(1).getReg();
601 if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) &&
602 !(BNEOp0 == CmpValReg && BNEOp1 == DestReg))
603 return false;
604
605 // Make sure the branch is the only user of the AND.
606 if (MaskReg.isValid()) {
607 if (BNEOp0 == DestReg && !MBBI->getOperand(0).isKill())
608 return false;
609 if (BNEOp1 == DestReg && !MBBI->getOperand(1).isKill())
610 return false;
611 }
612
613 ToErase.push_back(&*MBBI);
614 LoopHeadBNETarget = MBBI->getOperand(2).getMBB();
615 MBBI = skipDebugInstructionsForward(std::next(MBBI), E);
616 if (MBBI != E)
617 return false;
618
619 MBB.removeSuccessor(LoopHeadBNETarget);
620 for (auto *MI : ToErase)
621 MI->eraseFromParent();
622 return true;
623}
624
625bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
627 int Width, MachineBasicBlock::iterator &NextMBBI) {
628 MachineInstr &MI = *MBBI;
629 DebugLoc DL = MI.getDebugLoc();
631 auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
632 auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
633 auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
634
635 Register DestReg = MI.getOperand(0).getReg();
636 Register ScratchReg = MI.getOperand(1).getReg();
637 Register AddrReg = MI.getOperand(2).getReg();
638 Register CmpValReg = MI.getOperand(3).getReg();
639 Register NewValReg = MI.getOperand(4).getReg();
640 Register MaskReg = IsMasked ? MI.getOperand(5).getReg() : Register();
641
642 MachineBasicBlock *LoopHeadBNETarget = DoneMBB;
643 tryToFoldBNEOnCmpXchgResult(MBB, std::next(MBBI), DestReg, CmpValReg, MaskReg,
644 LoopHeadBNETarget);
645
646 // Insert new MBBs.
647 MF->insert(++MBB.getIterator(), LoopHeadMBB);
648 MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
649 MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
650
651 // Set up successors and transfer remaining instructions to DoneMBB.
652 LoopHeadMBB->addSuccessor(LoopTailMBB);
653 LoopHeadMBB->addSuccessor(LoopHeadBNETarget);
654 LoopTailMBB->addSuccessor(DoneMBB);
655 LoopTailMBB->addSuccessor(LoopHeadMBB);
656 DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
657 DoneMBB->transferSuccessors(&MBB);
658 MBB.addSuccessor(LoopHeadMBB);
659
660 AtomicOrdering Ordering =
661 static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
662
663 if (!IsMasked) {
664 // .loophead:
665 // lr.[w|d] dest, (addr)
666 // bne dest, cmpval, done
667 BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
668 DestReg)
669 .addReg(AddrReg);
670 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
671 .addReg(DestReg)
672 .addReg(CmpValReg)
673 .addMBB(LoopHeadBNETarget);
674 // .looptail:
675 // sc.[w|d] scratch, newval, (addr)
676 // bnez scratch, loophead
677 BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
678 ScratchReg)
679 .addReg(AddrReg)
680 .addReg(NewValReg);
681 BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
682 .addReg(ScratchReg)
683 .addReg(RISCV::X0)
684 .addMBB(LoopHeadMBB);
685 } else {
686 // .loophead:
687 // lr.w dest, (addr)
688 // and scratch, dest, mask
689 // bne scratch, cmpval, done
690 Register MaskReg = MI.getOperand(5).getReg();
691 BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
692 DestReg)
693 .addReg(AddrReg);
694 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
695 .addReg(DestReg)
696 .addReg(MaskReg);
697 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
698 .addReg(ScratchReg)
699 .addReg(CmpValReg)
700 .addMBB(LoopHeadBNETarget);
701
702 // .looptail:
703 // xor scratch, dest, newval
704 // and scratch, scratch, mask
705 // xor scratch, dest, scratch
706 // sc.w scratch, scratch, (adrr)
707 // bnez scratch, loophead
708 insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
709 MaskReg, ScratchReg);
710 BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
711 ScratchReg)
712 .addReg(AddrReg)
713 .addReg(ScratchReg);
714 BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
715 .addReg(ScratchReg)
716 .addReg(RISCV::X0)
717 .addMBB(LoopHeadMBB);
718 }
719
720 NextMBBI = MBB.end();
721 MI.eraseFromParent();
722
723 LivePhysRegs LiveRegs;
724 computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
725 computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
726 computeAndAddLiveIns(LiveRegs, *DoneMBB);
727
728 return true;
729}
730
731} // end of anonymous namespace
732
733INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",
735
736namespace llvm {
737
739 return new RISCVExpandAtomicPseudo();
740}
741
742} // end of namespace llvm
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
#define RISCV_EXPAND_ATOMIC_PSEUDO_NAME
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static unsigned getInstSizeInBytes(const MachineInstr &MI, const SystemZInstrInfo *TII)
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:734
@ Sub
*p = old - v
Definition: Instructions.h:722
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:732
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:738
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:736
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
A debug info location.
Definition: DebugLoc.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:52
void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:69
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
self_iterator getIterator()
Definition: ilist_node.h:132
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createRISCVExpandAtomicPseudoPass()
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
void initializeRISCVExpandAtomicPseudoPass(PassRegistry &)
AtomicOrdering
Atomic ordering for LLVM's memory model.
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().