LLVM 22.0.0git
RISCVExpandAtomicPseudoInsts.cpp
Go to the documentation of this file.
1//===-- RISCVExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands atomic pseudo instructions into
10// target instructions. This pass should be run at the last possible moment,
11// avoiding the possibility for other passes to break the requirements for
12// forward progress in the LR/SC block.
13//
14//===----------------------------------------------------------------------===//
15
16#include "RISCV.h"
17#include "RISCVInstrInfo.h"
18#include "RISCVTargetMachine.h"
19
23
24using namespace llvm;
25
26#define RISCV_EXPAND_ATOMIC_PSEUDO_NAME \
27 "RISC-V atomic pseudo instruction expansion pass"
28
29namespace {
30
31class RISCVExpandAtomicPseudo : public MachineFunctionPass {
32public:
33 const RISCVSubtarget *STI;
34 const RISCVInstrInfo *TII;
35 static char ID;
36
37 RISCVExpandAtomicPseudo() : MachineFunctionPass(ID) {}
38
39 bool runOnMachineFunction(MachineFunction &MF) override;
40
41 StringRef getPassName() const override {
43 }
44
45private:
46 bool expandMBB(MachineBasicBlock &MBB);
49 bool expandAtomicBinOp(MachineBasicBlock &MBB,
51 bool IsMasked, int Width,
53 bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
55 AtomicRMWInst::BinOp, bool IsMasked, int Width,
57 bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
58 MachineBasicBlock::iterator MBBI, bool IsMasked,
59 int Width, MachineBasicBlock::iterator &NextMBBI);
60#ifndef NDEBUG
61 unsigned getInstSizeInBytes(const MachineFunction &MF) const {
62 unsigned Size = 0;
63 for (auto &MBB : MF)
64 for (auto &MI : MBB)
65 Size += TII->getInstSizeInBytes(MI);
66 return Size;
67 }
68#endif
69};
70
71char RISCVExpandAtomicPseudo::ID = 0;
72
73bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
74 STI = &MF.getSubtarget<RISCVSubtarget>();
75 TII = STI->getInstrInfo();
76
77#ifndef NDEBUG
78 const unsigned OldSize = getInstSizeInBytes(MF);
79#endif
80
81 bool Modified = false;
82 for (auto &MBB : MF)
83 Modified |= expandMBB(MBB);
84
85#ifndef NDEBUG
86 const unsigned NewSize = getInstSizeInBytes(MF);
87 assert(OldSize >= NewSize);
88#endif
89 return Modified;
90}
91
92bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
93 bool Modified = false;
94
95 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
96 while (MBBI != E) {
97 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
98 Modified |= expandMI(MBB, MBBI, NMBBI);
99 MBBI = NMBBI;
100 }
101
102 return Modified;
103}
104
105bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
107 MachineBasicBlock::iterator &NextMBBI) {
108 // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
109 // expanded instructions for each pseudo is correct in the Size field of the
110 // tablegen definition for the pseudo.
111 switch (MBBI->getOpcode()) {
112 case RISCV::PseudoAtomicSwap32:
113 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 32,
114 NextMBBI);
115 case RISCV::PseudoAtomicSwap64:
116 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 64,
117 NextMBBI);
118 case RISCV::PseudoAtomicLoadAdd32:
119 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 32,
120 NextMBBI);
121 case RISCV::PseudoAtomicLoadAdd64:
122 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 64,
123 NextMBBI);
124 case RISCV::PseudoAtomicLoadSub32:
125 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 32,
126 NextMBBI);
127 case RISCV::PseudoAtomicLoadSub64:
128 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 64,
129 NextMBBI);
130 case RISCV::PseudoAtomicLoadAnd32:
131 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 32,
132 NextMBBI);
133 case RISCV::PseudoAtomicLoadAnd64:
134 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 64,
135 NextMBBI);
136 case RISCV::PseudoAtomicLoadOr32:
137 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 32, NextMBBI);
138 case RISCV::PseudoAtomicLoadOr64:
139 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 64, NextMBBI);
140 case RISCV::PseudoAtomicLoadXor32:
141 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
142 NextMBBI);
143 case RISCV::PseudoAtomicLoadXor64:
144 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 64,
145 NextMBBI);
146 case RISCV::PseudoAtomicLoadNand32:
147 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
148 NextMBBI);
149 case RISCV::PseudoAtomicLoadNand64:
150 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
151 NextMBBI);
152 case RISCV::PseudoAtomicLoadMin32:
153 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 32,
154 NextMBBI);
155 case RISCV::PseudoAtomicLoadMin64:
156 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 64,
157 NextMBBI);
158 case RISCV::PseudoAtomicLoadMax32:
159 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 32,
160 NextMBBI);
161 case RISCV::PseudoAtomicLoadMax64:
162 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 64,
163 NextMBBI);
164 case RISCV::PseudoAtomicLoadUMin32:
165 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 32,
166 NextMBBI);
167 case RISCV::PseudoAtomicLoadUMin64:
168 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 64,
169 NextMBBI);
170 case RISCV::PseudoAtomicLoadUMax32:
171 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 32,
172 NextMBBI);
173 case RISCV::PseudoAtomicLoadUMax64:
174 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 64,
175 NextMBBI);
176 case RISCV::PseudoMaskedAtomicSwap32:
177 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
178 NextMBBI);
179 case RISCV::PseudoMaskedAtomicLoadAdd32:
180 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
181 case RISCV::PseudoMaskedAtomicLoadSub32:
182 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
183 case RISCV::PseudoMaskedAtomicLoadNand32:
184 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
185 NextMBBI);
186 case RISCV::PseudoMaskedAtomicLoadMax32:
187 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
188 NextMBBI);
189 case RISCV::PseudoMaskedAtomicLoadMin32:
190 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
191 NextMBBI);
192 case RISCV::PseudoMaskedAtomicLoadUMax32:
193 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
194 NextMBBI);
195 case RISCV::PseudoMaskedAtomicLoadUMin32:
196 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
197 NextMBBI);
198 case RISCV::PseudoCmpXchg32:
199 return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
200 case RISCV::PseudoCmpXchg64:
201 return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
202 case RISCV::PseudoMaskedCmpXchg32:
203 return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
204 }
205
206 return false;
207}
208
209static unsigned getLRForRMW32(AtomicOrdering Ordering,
210 const RISCVSubtarget *Subtarget) {
211 switch (Ordering) {
212 default:
213 llvm_unreachable("Unexpected AtomicOrdering");
215 return RISCV::LR_W;
217 if (Subtarget->hasStdExtZtso())
218 return RISCV::LR_W;
219 return RISCV::LR_W_AQ;
221 return RISCV::LR_W;
223 if (Subtarget->hasStdExtZtso())
224 return RISCV::LR_W;
225 return RISCV::LR_W_AQ;
227 return RISCV::LR_W_AQRL;
228 }
229}
230
231static unsigned getSCForRMW32(AtomicOrdering Ordering,
232 const RISCVSubtarget *Subtarget) {
233 switch (Ordering) {
234 default:
235 llvm_unreachable("Unexpected AtomicOrdering");
237 return RISCV::SC_W;
239 return RISCV::SC_W;
241 if (Subtarget->hasStdExtZtso())
242 return RISCV::SC_W;
243 return RISCV::SC_W_RL;
245 if (Subtarget->hasStdExtZtso())
246 return RISCV::SC_W;
247 return RISCV::SC_W_RL;
249 return RISCV::SC_W_RL;
250 }
251}
252
253static unsigned getLRForRMW64(AtomicOrdering Ordering,
254 const RISCVSubtarget *Subtarget) {
255 switch (Ordering) {
256 default:
257 llvm_unreachable("Unexpected AtomicOrdering");
259 return RISCV::LR_D;
261 if (Subtarget->hasStdExtZtso())
262 return RISCV::LR_D;
263 return RISCV::LR_D_AQ;
265 return RISCV::LR_D;
267 if (Subtarget->hasStdExtZtso())
268 return RISCV::LR_D;
269 return RISCV::LR_D_AQ;
271 return RISCV::LR_D_AQRL;
272 }
273}
274
275static unsigned getSCForRMW64(AtomicOrdering Ordering,
276 const RISCVSubtarget *Subtarget) {
277 switch (Ordering) {
278 default:
279 llvm_unreachable("Unexpected AtomicOrdering");
281 return RISCV::SC_D;
283 return RISCV::SC_D;
285 if (Subtarget->hasStdExtZtso())
286 return RISCV::SC_D;
287 return RISCV::SC_D_RL;
289 if (Subtarget->hasStdExtZtso())
290 return RISCV::SC_D;
291 return RISCV::SC_D_RL;
293 return RISCV::SC_D_RL;
294 }
295}
296
297static unsigned getLRForRMW(AtomicOrdering Ordering, int Width,
298 const RISCVSubtarget *Subtarget) {
299 if (Width == 32)
300 return getLRForRMW32(Ordering, Subtarget);
301 if (Width == 64)
302 return getLRForRMW64(Ordering, Subtarget);
303 llvm_unreachable("Unexpected LR width\n");
304}
305
306static unsigned getSCForRMW(AtomicOrdering Ordering, int Width,
307 const RISCVSubtarget *Subtarget) {
308 if (Width == 32)
309 return getSCForRMW32(Ordering, Subtarget);
310 if (Width == 64)
311 return getSCForRMW64(Ordering, Subtarget);
312 llvm_unreachable("Unexpected SC width\n");
313}
314
315static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
316 DebugLoc DL, MachineBasicBlock *ThisMBB,
317 MachineBasicBlock *LoopMBB,
318 MachineBasicBlock *DoneMBB,
319 AtomicRMWInst::BinOp BinOp, int Width,
320 const RISCVSubtarget *STI) {
321 Register DestReg = MI.getOperand(0).getReg();
322 Register ScratchReg = MI.getOperand(1).getReg();
323 Register AddrReg = MI.getOperand(2).getReg();
324 Register IncrReg = MI.getOperand(3).getReg();
325 AtomicOrdering Ordering =
326 static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
327
328 // .loop:
329 // lr.[w|d] dest, (addr)
330 // binop scratch, dest, val
331 // sc.[w|d] scratch, scratch, (addr)
332 // bnez scratch, loop
333 BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)
334 .addReg(AddrReg);
335 switch (BinOp) {
336 default:
337 llvm_unreachable("Unexpected AtomicRMW BinOp");
339 BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
340 .addReg(IncrReg)
341 .addImm(0);
342 break;
344 BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
345 .addReg(DestReg)
346 .addReg(IncrReg);
347 break;
349 BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
350 .addReg(DestReg)
351 .addReg(IncrReg);
352 break;
354 BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
355 .addReg(DestReg)
356 .addReg(IncrReg);
357 break;
359 BuildMI(LoopMBB, DL, TII->get(RISCV::OR), ScratchReg)
360 .addReg(DestReg)
361 .addReg(IncrReg);
362 break;
364 BuildMI(LoopMBB, DL, TII->get(RISCV::XOR), ScratchReg)
365 .addReg(DestReg)
366 .addReg(IncrReg);
367 break;
369 BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
370 .addReg(DestReg)
371 .addReg(IncrReg);
372 BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
373 .addReg(ScratchReg)
374 .addImm(-1);
375 break;
376 }
377 BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg)
378 .addReg(ScratchReg)
379 .addReg(AddrReg);
380 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
381 .addReg(ScratchReg)
382 .addReg(RISCV::X0)
383 .addMBB(LoopMBB);
384}
385
386static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
388 Register OldValReg, Register NewValReg,
389 Register MaskReg, Register ScratchReg) {
390 assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
391 assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
392 assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
393
394 // We select bits from newval and oldval using:
395 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
396 // r = oldval ^ ((oldval ^ newval) & masktargetdata);
397 BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
398 .addReg(OldValReg)
399 .addReg(NewValReg);
400 BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
401 .addReg(ScratchReg)
402 .addReg(MaskReg);
403 BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
404 .addReg(OldValReg)
405 .addReg(ScratchReg);
406}
407
408static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII,
410 MachineBasicBlock *ThisMBB,
411 MachineBasicBlock *LoopMBB,
412 MachineBasicBlock *DoneMBB,
413 AtomicRMWInst::BinOp BinOp, int Width,
414 const RISCVSubtarget *STI) {
415 assert(Width == 32 && "Should never need to expand masked 64-bit operations");
416 Register DestReg = MI.getOperand(0).getReg();
417 Register ScratchReg = MI.getOperand(1).getReg();
418 Register AddrReg = MI.getOperand(2).getReg();
419 Register IncrReg = MI.getOperand(3).getReg();
420 Register MaskReg = MI.getOperand(4).getReg();
421 AtomicOrdering Ordering =
422 static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
423
424 // .loop:
425 // lr.w destreg, (alignedaddr)
426 // binop scratch, destreg, incr
427 // xor scratch, destreg, scratch
428 // and scratch, scratch, masktargetdata
429 // xor scratch, destreg, scratch
430 // sc.w scratch, scratch, (alignedaddr)
431 // bnez scratch, loop
432 BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
433 .addReg(AddrReg);
434 switch (BinOp) {
435 default:
436 llvm_unreachable("Unexpected AtomicRMW BinOp");
438 BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
439 .addReg(IncrReg)
440 .addImm(0);
441 break;
443 BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
444 .addReg(DestReg)
445 .addReg(IncrReg);
446 break;
448 BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
449 .addReg(DestReg)
450 .addReg(IncrReg);
451 break;
453 BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
454 .addReg(DestReg)
455 .addReg(IncrReg);
456 BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
457 .addReg(ScratchReg)
458 .addImm(-1);
459 break;
460 }
461
462 insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
463 ScratchReg);
464
465 BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), ScratchReg)
466 .addReg(ScratchReg)
467 .addReg(AddrReg);
468 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
469 .addReg(ScratchReg)
470 .addReg(RISCV::X0)
471 .addMBB(LoopMBB);
472}
473
474bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
476 AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
477 MachineBasicBlock::iterator &NextMBBI) {
478 MachineInstr &MI = *MBBI;
479 DebugLoc DL = MI.getDebugLoc();
480
481 MachineFunction *MF = MBB.getParent();
482 auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
483 auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
484
485 // Insert new MBBs.
486 MF->insert(++MBB.getIterator(), LoopMBB);
487 MF->insert(++LoopMBB->getIterator(), DoneMBB);
488
489 // Set up successors and transfer remaining instructions to DoneMBB.
490 LoopMBB->addSuccessor(LoopMBB);
491 LoopMBB->addSuccessor(DoneMBB);
492 DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
493 DoneMBB->transferSuccessors(&MBB);
494 MBB.addSuccessor(LoopMBB);
495
496 if (!IsMasked)
497 doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width,
498 STI);
499 else
500 doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
501 Width, STI);
502
503 NextMBBI = MBB.end();
504 MI.eraseFromParent();
505
509
510 return true;
511}
512
513static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
515 Register ShamtReg) {
516 BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
517 .addReg(ValReg)
518 .addReg(ShamtReg);
519 BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
520 .addReg(ValReg)
521 .addReg(ShamtReg);
522}
523
524static void doAtomicMinMaxOpExpansion(
526 MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB,
527 MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB,
528 MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width,
529 const RISCVSubtarget *STI) {
530 Register DestReg = MI.getOperand(0).getReg();
531 Register ScratchReg = MI.getOperand(1).getReg();
532 Register AddrReg = MI.getOperand(2).getReg();
533 Register IncrReg = MI.getOperand(3).getReg();
534 AtomicOrdering Ordering =
535 static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
536
537 // .loophead:
538 // lr.[w|d] dest, (addr)
539 // mv scratch, dest
540 // ifnochangeneeded scratch, incr, .looptail
541 BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)
542 .addReg(AddrReg);
543 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
544 .addReg(DestReg)
545 .addImm(0);
546 switch (BinOp) {
547 default:
548 llvm_unreachable("Unexpected AtomicRMW BinOp");
549 case AtomicRMWInst::Max: {
550 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
551 .addReg(ScratchReg)
552 .addReg(IncrReg)
553 .addMBB(LoopTailMBB);
554 break;
555 }
556 case AtomicRMWInst::Min: {
557 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
558 .addReg(IncrReg)
559 .addReg(ScratchReg)
560 .addMBB(LoopTailMBB);
561 break;
562 }
564 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
565 .addReg(ScratchReg)
566 .addReg(IncrReg)
567 .addMBB(LoopTailMBB);
568 break;
570 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
571 .addReg(IncrReg)
572 .addReg(ScratchReg)
573 .addMBB(LoopTailMBB);
574 break;
575 }
576
577 // .loopifbody:
578 // mv scratch, incr
579 BuildMI(LoopIfBodyMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
580 .addReg(IncrReg)
581 .addImm(0);
582
583 // .looptail:
584 // sc.[w|d] scratch, scratch, (addr)
585 // bnez scratch, loop
586 BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
587 ScratchReg)
588 .addReg(ScratchReg)
589 .addReg(AddrReg);
590 BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
591 .addReg(ScratchReg)
592 .addReg(RISCV::X0)
593 .addMBB(LoopHeadMBB);
594}
595
596static void doMaskedAtomicMinMaxOpExpansion(
598 MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB,
599 MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB,
600 MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width,
601 const RISCVSubtarget *STI) {
602 assert(Width == 32 && "Should never need to expand masked 64-bit operations");
603 Register DestReg = MI.getOperand(0).getReg();
604 Register Scratch1Reg = MI.getOperand(1).getReg();
605 Register Scratch2Reg = MI.getOperand(2).getReg();
606 Register AddrReg = MI.getOperand(3).getReg();
607 Register IncrReg = MI.getOperand(4).getReg();
608 Register MaskReg = MI.getOperand(5).getReg();
609 bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
610 AtomicOrdering Ordering =
611 static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
612
613 //
614 // .loophead:
615 // lr.w destreg, (alignedaddr)
616 // and scratch2, destreg, mask
617 // mv scratch1, destreg
618 // [sext scratch2 if signed min/max]
619 // ifnochangeneeded scratch2, incr, .looptail
620 BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
621 .addReg(AddrReg);
622 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
623 .addReg(DestReg)
624 .addReg(MaskReg);
625 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
626 .addReg(DestReg)
627 .addImm(0);
628
629 switch (BinOp) {
630 default:
631 llvm_unreachable("Unexpected AtomicRMW BinOp");
632 case AtomicRMWInst::Max: {
633 insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
634 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
635 .addReg(Scratch2Reg)
636 .addReg(IncrReg)
637 .addMBB(LoopTailMBB);
638 break;
639 }
640 case AtomicRMWInst::Min: {
641 insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
642 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
643 .addReg(IncrReg)
644 .addReg(Scratch2Reg)
645 .addMBB(LoopTailMBB);
646 break;
647 }
649 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
650 .addReg(Scratch2Reg)
651 .addReg(IncrReg)
652 .addMBB(LoopTailMBB);
653 break;
655 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
656 .addReg(IncrReg)
657 .addReg(Scratch2Reg)
658 .addMBB(LoopTailMBB);
659 break;
660 }
661
662 // .loopifbody:
663 // xor scratch1, destreg, incr
664 // and scratch1, scratch1, mask
665 // xor scratch1, destreg, scratch1
666 insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
667 MaskReg, Scratch1Reg);
668
669 // .looptail:
670 // sc.w scratch1, scratch1, (addr)
671 // bnez scratch1, loop
672 BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), Scratch1Reg)
673 .addReg(Scratch1Reg)
674 .addReg(AddrReg);
675 BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
676 .addReg(Scratch1Reg)
677 .addReg(RISCV::X0)
678 .addMBB(LoopHeadMBB);
679}
680
681bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
683 AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
684 MachineBasicBlock::iterator &NextMBBI) {
685
686 MachineInstr &MI = *MBBI;
687 DebugLoc DL = MI.getDebugLoc();
688 MachineFunction *MF = MBB.getParent();
689 auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
690 auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
691 auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
692 auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
693
694 // Insert new MBBs.
695 MF->insert(++MBB.getIterator(), LoopHeadMBB);
696 MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
697 MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
698 MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
699
700 // Set up successors and transfer remaining instructions to DoneMBB.
701 LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
702 LoopHeadMBB->addSuccessor(LoopTailMBB);
703 LoopIfBodyMBB->addSuccessor(LoopTailMBB);
704 LoopTailMBB->addSuccessor(LoopHeadMBB);
705 LoopTailMBB->addSuccessor(DoneMBB);
706 DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
707 DoneMBB->transferSuccessors(&MBB);
708 MBB.addSuccessor(LoopHeadMBB);
709
710 if (!IsMasked)
711 doAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB, LoopIfBodyMBB,
712 LoopTailMBB, DoneMBB, BinOp, Width, STI);
713 else
714 doMaskedAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB,
715 LoopIfBodyMBB, LoopTailMBB, DoneMBB, BinOp,
716 Width, STI);
717
718 NextMBBI = MBB.end();
719 MI.eraseFromParent();
720
722 computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
723 computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
724 computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
726
727 return true;
728}
729
730// If a BNE on the cmpxchg comparison result immediately follows the cmpxchg
731// operation, it can be folded into the cmpxchg expansion by
732// modifying the branch within 'LoopHead' (which performs the same
733// comparison). This is a valid transformation because after altering the
734// LoopHead's BNE destination, the BNE following the cmpxchg becomes
735// redundant and and be deleted. In the case of a masked cmpxchg, an
736// appropriate AND and BNE must be matched.
737//
738// On success, returns true and deletes the matching BNE or AND+BNE, sets the
739// LoopHeadBNETarget argument to the target that should be used within the
740// loop head, and removes that block as a successor to MBB.
741bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB,
743 Register DestReg, Register CmpValReg,
744 Register MaskReg,
745 MachineBasicBlock *&LoopHeadBNETarget) {
747 auto E = MBB.end();
748 if (MBBI == E)
749 return false;
751
752 // If we have a masked cmpxchg, match AND dst, DestReg, MaskReg.
753 if (MaskReg.isValid()) {
754 if (MBBI == E || MBBI->getOpcode() != RISCV::AND)
755 return false;
756 Register ANDOp1 = MBBI->getOperand(1).getReg();
757 Register ANDOp2 = MBBI->getOperand(2).getReg();
758 if (!(ANDOp1 == DestReg && ANDOp2 == MaskReg) &&
759 !(ANDOp1 == MaskReg && ANDOp2 == DestReg))
760 return false;
761 // We now expect the BNE to use the result of the AND as an operand.
762 DestReg = MBBI->getOperand(0).getReg();
763 ToErase.push_back(&*MBBI);
765 }
766
767 // Match BNE DestReg, MaskReg.
768 if (MBBI == E || MBBI->getOpcode() != RISCV::BNE)
769 return false;
770 Register BNEOp0 = MBBI->getOperand(0).getReg();
771 Register BNEOp1 = MBBI->getOperand(1).getReg();
772 if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) &&
773 !(BNEOp0 == CmpValReg && BNEOp1 == DestReg))
774 return false;
775
776 // Make sure the branch is the only user of the AND.
777 if (MaskReg.isValid()) {
778 if (BNEOp0 == DestReg && !MBBI->getOperand(0).isKill())
779 return false;
780 if (BNEOp1 == DestReg && !MBBI->getOperand(1).isKill())
781 return false;
782 }
783
784 ToErase.push_back(&*MBBI);
785 LoopHeadBNETarget = MBBI->getOperand(2).getMBB();
787 if (MBBI != E)
788 return false;
789
790 MBB.removeSuccessor(LoopHeadBNETarget);
791 for (auto *MI : ToErase)
792 MI->eraseFromParent();
793 return true;
794}
795
796bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
798 int Width, MachineBasicBlock::iterator &NextMBBI) {
799 MachineInstr &MI = *MBBI;
800 DebugLoc DL = MI.getDebugLoc();
801 MachineFunction *MF = MBB.getParent();
802 auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
803 auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
804 auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
805
806 Register DestReg = MI.getOperand(0).getReg();
807 Register ScratchReg = MI.getOperand(1).getReg();
808 Register AddrReg = MI.getOperand(2).getReg();
809 Register CmpValReg = MI.getOperand(3).getReg();
810 Register NewValReg = MI.getOperand(4).getReg();
811 Register MaskReg = IsMasked ? MI.getOperand(5).getReg() : Register();
812
813 MachineBasicBlock *LoopHeadBNETarget = DoneMBB;
814 tryToFoldBNEOnCmpXchgResult(MBB, std::next(MBBI), DestReg, CmpValReg, MaskReg,
815 LoopHeadBNETarget);
816
817 // Insert new MBBs.
818 MF->insert(++MBB.getIterator(), LoopHeadMBB);
819 MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
820 MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
821
822 // Set up successors and transfer remaining instructions to DoneMBB.
823 LoopHeadMBB->addSuccessor(LoopTailMBB);
824 LoopHeadMBB->addSuccessor(LoopHeadBNETarget);
825 LoopTailMBB->addSuccessor(DoneMBB);
826 LoopTailMBB->addSuccessor(LoopHeadMBB);
827 DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
828 DoneMBB->transferSuccessors(&MBB);
829 MBB.addSuccessor(LoopHeadMBB);
830
831 AtomicOrdering Ordering =
832 static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
833
834 if (!IsMasked) {
835 // .loophead:
836 // lr.[w|d] dest, (addr)
837 // bne dest, cmpval, done
838 BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
839 DestReg)
840 .addReg(AddrReg);
841 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
842 .addReg(DestReg)
843 .addReg(CmpValReg)
844 .addMBB(LoopHeadBNETarget);
845 // .looptail:
846 // sc.[w|d] scratch, newval, (addr)
847 // bnez scratch, loophead
848 BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
849 ScratchReg)
850 .addReg(NewValReg)
851 .addReg(AddrReg);
852 BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
853 .addReg(ScratchReg)
854 .addReg(RISCV::X0)
855 .addMBB(LoopHeadMBB);
856 } else {
857 // .loophead:
858 // lr.w dest, (addr)
859 // and scratch, dest, mask
860 // bne scratch, cmpval, done
861 Register MaskReg = MI.getOperand(5).getReg();
862 BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
863 DestReg)
864 .addReg(AddrReg);
865 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
866 .addReg(DestReg)
867 .addReg(MaskReg);
868 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
869 .addReg(ScratchReg)
870 .addReg(CmpValReg)
871 .addMBB(LoopHeadBNETarget);
872
873 // .looptail:
874 // xor scratch, dest, newval
875 // and scratch, scratch, mask
876 // xor scratch, dest, scratch
877 // sc.w scratch, scratch, (adrr)
878 // bnez scratch, loophead
879 insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
880 MaskReg, ScratchReg);
881 BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
882 ScratchReg)
883 .addReg(ScratchReg)
884 .addReg(AddrReg);
885 BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
886 .addReg(ScratchReg)
887 .addReg(RISCV::X0)
888 .addMBB(LoopHeadMBB);
889 }
890
891 NextMBBI = MBB.end();
892 MI.eraseFromParent();
893
895 computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
896 computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
898
899 return true;
900}
901
902} // end of anonymous namespace
903
904INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",
906
908 return new RISCVExpandAtomicPseudo();
909}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define RISCV_EXPAND_ATOMIC_PSEUDO_NAME
static unsigned getInstSizeInBytes(const MachineInstr &MI, const SystemZInstrInfo *TII)
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ Nand
*p = ~(old & v)
A debug info location.
Definition DebugLoc.h:124
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
A set of physical registers with utility functions to track liveness when walking backward/forward th...
LLVM_ABI void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
const RISCVInstrInfo * getInstrInfo() const override
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createRISCVExpandAtomicPseudoPass()
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
AtomicOrdering
Atomic ordering for LLVM's memory model.
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().