LLVM 22.0.0git
RISCVExpandAtomicPseudoInsts.cpp
Go to the documentation of this file.
1//===-- RISCVExpandAtomicPseudoInsts.cpp - Expand atomic pseudo instrs. ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass that expands atomic pseudo instructions into
10// target instructions. This pass should be run at the last possible moment,
11// avoiding the possibility for other passes to break the requirements for
12// forward progress in the LR/SC block.
13//
14//===----------------------------------------------------------------------===//
15
16#include "RISCV.h"
17#include "RISCVInstrInfo.h"
18#include "RISCVTargetMachine.h"
19
23
24using namespace llvm;
25
26#define RISCV_EXPAND_ATOMIC_PSEUDO_NAME \
27 "RISC-V atomic pseudo instruction expansion pass"
28
29namespace {
30
31class RISCVExpandAtomicPseudo : public MachineFunctionPass {
32public:
33 const RISCVSubtarget *STI;
34 const RISCVInstrInfo *TII;
35 static char ID;
36
37 RISCVExpandAtomicPseudo() : MachineFunctionPass(ID) {}
38
39 bool runOnMachineFunction(MachineFunction &MF) override;
40
41 StringRef getPassName() const override {
43 }
44
45private:
46 bool expandMBB(MachineBasicBlock &MBB);
49 bool expandAtomicBinOp(MachineBasicBlock &MBB,
51 bool IsMasked, int Width,
53 bool expandAtomicMinMaxOp(MachineBasicBlock &MBB,
55 AtomicRMWInst::BinOp, bool IsMasked, int Width,
57 bool expandAtomicCmpXchg(MachineBasicBlock &MBB,
58 MachineBasicBlock::iterator MBBI, bool IsMasked,
59 int Width, MachineBasicBlock::iterator &NextMBBI);
60#ifndef NDEBUG
61 unsigned getInstSizeInBytes(const MachineFunction &MF) const {
62 unsigned Size = 0;
63 for (auto &MBB : MF)
64 for (auto &MI : MBB)
65 Size += TII->getInstSizeInBytes(MI);
66 return Size;
67 }
68#endif
69};
70
71char RISCVExpandAtomicPseudo::ID = 0;
72
73bool RISCVExpandAtomicPseudo::runOnMachineFunction(MachineFunction &MF) {
74 STI = &MF.getSubtarget<RISCVSubtarget>();
75 TII = STI->getInstrInfo();
76
77#ifndef NDEBUG
78 const unsigned OldSize = getInstSizeInBytes(MF);
79#endif
80
81 bool Modified = false;
82 for (auto &MBB : MF)
83 Modified |= expandMBB(MBB);
84
85#ifndef NDEBUG
86 const unsigned NewSize = getInstSizeInBytes(MF);
87 assert(OldSize >= NewSize);
88#endif
89 return Modified;
90}
91
92bool RISCVExpandAtomicPseudo::expandMBB(MachineBasicBlock &MBB) {
93 bool Modified = false;
94
95 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
96 while (MBBI != E) {
97 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
98 Modified |= expandMI(MBB, MBBI, NMBBI);
99 MBBI = NMBBI;
100 }
101
102 return Modified;
103}
104
105bool RISCVExpandAtomicPseudo::expandMI(MachineBasicBlock &MBB,
107 MachineBasicBlock::iterator &NextMBBI) {
108 // RISCVInstrInfo::getInstSizeInBytes expects that the total size of the
109 // expanded instructions for each pseudo is correct in the Size field of the
110 // tablegen definition for the pseudo.
111 switch (MBBI->getOpcode()) {
112 case RISCV::PseudoAtomicSwap32:
113 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 32,
114 NextMBBI);
115 case RISCV::PseudoAtomicSwap64:
116 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, false, 64,
117 NextMBBI);
118 case RISCV::PseudoAtomicLoadAdd32:
119 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 32,
120 NextMBBI);
121 case RISCV::PseudoAtomicLoadAdd64:
122 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, false, 64,
123 NextMBBI);
124 case RISCV::PseudoAtomicLoadSub32:
125 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 32,
126 NextMBBI);
127 case RISCV::PseudoAtomicLoadSub64:
128 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, false, 64,
129 NextMBBI);
130 case RISCV::PseudoAtomicLoadAnd32:
131 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 32,
132 NextMBBI);
133 case RISCV::PseudoAtomicLoadAnd64:
134 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::And, false, 64,
135 NextMBBI);
136 case RISCV::PseudoAtomicLoadOr32:
137 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 32, NextMBBI);
138 case RISCV::PseudoAtomicLoadOr64:
139 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Or, false, 64, NextMBBI);
140 case RISCV::PseudoAtomicLoadXor32:
141 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 32,
142 NextMBBI);
143 case RISCV::PseudoAtomicLoadXor64:
144 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xor, false, 64,
145 NextMBBI);
146 case RISCV::PseudoAtomicLoadNand32:
147 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 32,
148 NextMBBI);
149 case RISCV::PseudoAtomicLoadNand64:
150 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, false, 64,
151 NextMBBI);
152 case RISCV::PseudoAtomicLoadMin32:
153 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 32,
154 NextMBBI);
155 case RISCV::PseudoAtomicLoadMin64:
156 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, false, 64,
157 NextMBBI);
158 case RISCV::PseudoAtomicLoadMax32:
159 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 32,
160 NextMBBI);
161 case RISCV::PseudoAtomicLoadMax64:
162 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, false, 64,
163 NextMBBI);
164 case RISCV::PseudoAtomicLoadUMin32:
165 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 32,
166 NextMBBI);
167 case RISCV::PseudoAtomicLoadUMin64:
168 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, false, 64,
169 NextMBBI);
170 case RISCV::PseudoAtomicLoadUMax32:
171 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 32,
172 NextMBBI);
173 case RISCV::PseudoAtomicLoadUMax64:
174 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, false, 64,
175 NextMBBI);
176 case RISCV::PseudoMaskedAtomicSwap32:
177 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Xchg, true, 32,
178 NextMBBI);
179 case RISCV::PseudoMaskedAtomicLoadAdd32:
180 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Add, true, 32, NextMBBI);
181 case RISCV::PseudoMaskedAtomicLoadSub32:
182 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Sub, true, 32, NextMBBI);
183 case RISCV::PseudoMaskedAtomicLoadNand32:
184 return expandAtomicBinOp(MBB, MBBI, AtomicRMWInst::Nand, true, 32,
185 NextMBBI);
186 case RISCV::PseudoMaskedAtomicLoadMax32:
187 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Max, true, 32,
188 NextMBBI);
189 case RISCV::PseudoMaskedAtomicLoadMin32:
190 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::Min, true, 32,
191 NextMBBI);
192 case RISCV::PseudoMaskedAtomicLoadUMax32:
193 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMax, true, 32,
194 NextMBBI);
195 case RISCV::PseudoMaskedAtomicLoadUMin32:
196 return expandAtomicMinMaxOp(MBB, MBBI, AtomicRMWInst::UMin, true, 32,
197 NextMBBI);
198 case RISCV::PseudoCmpXchg32:
199 return expandAtomicCmpXchg(MBB, MBBI, false, 32, NextMBBI);
200 case RISCV::PseudoCmpXchg64:
201 return expandAtomicCmpXchg(MBB, MBBI, false, 64, NextMBBI);
202 case RISCV::PseudoMaskedCmpXchg32:
203 return expandAtomicCmpXchg(MBB, MBBI, true, 32, NextMBBI);
204 }
205
206 return false;
207}
208
209static unsigned getLRForRMW32(AtomicOrdering Ordering,
210 const RISCVSubtarget *Subtarget) {
211 switch (Ordering) {
212 default:
213 llvm_unreachable("Unexpected AtomicOrdering");
215 return RISCV::LR_W;
217 if (Subtarget->hasStdExtZtso())
218 return RISCV::LR_W;
219 return RISCV::LR_W_AQ;
221 return RISCV::LR_W;
223 if (Subtarget->hasStdExtZtso())
224 return RISCV::LR_W;
225 return RISCV::LR_W_AQ;
227 return RISCV::LR_W_AQRL;
228 }
229}
230
231static unsigned getSCForRMW32(AtomicOrdering Ordering,
232 const RISCVSubtarget *Subtarget) {
233 switch (Ordering) {
234 default:
235 llvm_unreachable("Unexpected AtomicOrdering");
237 return RISCV::SC_W;
239 return RISCV::SC_W;
241 if (Subtarget->hasStdExtZtso())
242 return RISCV::SC_W;
243 return RISCV::SC_W_RL;
245 if (Subtarget->hasStdExtZtso())
246 return RISCV::SC_W;
247 return RISCV::SC_W_RL;
249 return RISCV::SC_W_RL;
250 }
251}
252
253static unsigned getLRForRMW64(AtomicOrdering Ordering,
254 const RISCVSubtarget *Subtarget) {
255 switch (Ordering) {
256 default:
257 llvm_unreachable("Unexpected AtomicOrdering");
259 return RISCV::LR_D;
261 if (Subtarget->hasStdExtZtso())
262 return RISCV::LR_D;
263 return RISCV::LR_D_AQ;
265 return RISCV::LR_D;
267 if (Subtarget->hasStdExtZtso())
268 return RISCV::LR_D;
269 return RISCV::LR_D_AQ;
271 return RISCV::LR_D_AQRL;
272 }
273}
274
275static unsigned getSCForRMW64(AtomicOrdering Ordering,
276 const RISCVSubtarget *Subtarget) {
277 switch (Ordering) {
278 default:
279 llvm_unreachable("Unexpected AtomicOrdering");
281 return RISCV::SC_D;
283 return RISCV::SC_D;
285 if (Subtarget->hasStdExtZtso())
286 return RISCV::SC_D;
287 return RISCV::SC_D_RL;
289 if (Subtarget->hasStdExtZtso())
290 return RISCV::SC_D;
291 return RISCV::SC_D_RL;
293 return RISCV::SC_D_RL;
294 }
295}
296
297static unsigned getLRForRMW(AtomicOrdering Ordering, int Width,
298 const RISCVSubtarget *Subtarget) {
299 if (Width == 32)
300 return getLRForRMW32(Ordering, Subtarget);
301 if (Width == 64)
302 return getLRForRMW64(Ordering, Subtarget);
303 llvm_unreachable("Unexpected LR width\n");
304}
305
306static unsigned getSCForRMW(AtomicOrdering Ordering, int Width,
307 const RISCVSubtarget *Subtarget) {
308 if (Width == 32)
309 return getSCForRMW32(Ordering, Subtarget);
310 if (Width == 64)
311 return getSCForRMW64(Ordering, Subtarget);
312 llvm_unreachable("Unexpected SC width\n");
313}
314
315static void doAtomicBinOpExpansion(const RISCVInstrInfo *TII, MachineInstr &MI,
316 DebugLoc DL, MachineBasicBlock *ThisMBB,
317 MachineBasicBlock *LoopMBB,
318 MachineBasicBlock *DoneMBB,
319 AtomicRMWInst::BinOp BinOp, int Width,
320 const RISCVSubtarget *STI) {
321 Register DestReg = MI.getOperand(0).getReg();
322 Register ScratchReg = MI.getOperand(1).getReg();
323 Register AddrReg = MI.getOperand(2).getReg();
324 Register IncrReg = MI.getOperand(3).getReg();
325 AtomicOrdering Ordering =
326 static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
327
328 // .loop:
329 // lr.[w|d] dest, (addr)
330 // binop scratch, dest, val
331 // sc.[w|d] scratch, scratch, (addr)
332 // bnez scratch, loop
333 BuildMI(LoopMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)
334 .addReg(AddrReg);
335 switch (BinOp) {
336 default:
337 llvm_unreachable("Unexpected AtomicRMW BinOp");
339 BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
340 .addReg(IncrReg)
341 .addImm(0);
342 break;
344 BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
345 .addReg(DestReg)
346 .addReg(IncrReg);
347 break;
349 BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
350 .addReg(DestReg)
351 .addReg(IncrReg);
352 break;
354 BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
355 .addReg(DestReg)
356 .addReg(IncrReg);
357 break;
359 BuildMI(LoopMBB, DL, TII->get(RISCV::OR), ScratchReg)
360 .addReg(DestReg)
361 .addReg(IncrReg);
362 break;
364 BuildMI(LoopMBB, DL, TII->get(RISCV::XOR), ScratchReg)
365 .addReg(DestReg)
366 .addReg(IncrReg);
367 break;
369 BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
370 .addReg(DestReg)
371 .addReg(IncrReg);
372 BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
373 .addReg(ScratchReg)
374 .addImm(-1);
375 break;
377 BuildMI(LoopMBB, DL, TII->get(RISCV::MAX), ScratchReg)
378 .addReg(DestReg)
379 .addReg(IncrReg);
380 break;
382 BuildMI(LoopMBB, DL, TII->get(RISCV::MIN), ScratchReg)
383 .addReg(DestReg)
384 .addReg(IncrReg);
385 break;
387 BuildMI(LoopMBB, DL, TII->get(RISCV::MAXU), ScratchReg)
388 .addReg(DestReg)
389 .addReg(IncrReg);
390 break;
392 BuildMI(LoopMBB, DL, TII->get(RISCV::MINU), ScratchReg)
393 .addReg(DestReg)
394 .addReg(IncrReg);
395 break;
396 }
397 BuildMI(LoopMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)), ScratchReg)
398 .addReg(ScratchReg)
399 .addReg(AddrReg);
400 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
401 .addReg(ScratchReg)
402 .addReg(RISCV::X0)
403 .addMBB(LoopMBB);
404}
405
406static void insertMaskedMerge(const RISCVInstrInfo *TII, DebugLoc DL,
408 Register OldValReg, Register NewValReg,
409 Register MaskReg, Register ScratchReg) {
410 assert(OldValReg != ScratchReg && "OldValReg and ScratchReg must be unique");
411 assert(OldValReg != MaskReg && "OldValReg and MaskReg must be unique");
412 assert(ScratchReg != MaskReg && "ScratchReg and MaskReg must be unique");
413
414 // We select bits from newval and oldval using:
415 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge
416 // r = oldval ^ ((oldval ^ newval) & masktargetdata);
417 BuildMI(MBB, DL, TII->get(RISCV::XOR), ScratchReg)
418 .addReg(OldValReg)
419 .addReg(NewValReg);
420 BuildMI(MBB, DL, TII->get(RISCV::AND), ScratchReg)
421 .addReg(ScratchReg)
422 .addReg(MaskReg);
423 BuildMI(MBB, DL, TII->get(RISCV::XOR), DestReg)
424 .addReg(OldValReg)
425 .addReg(ScratchReg);
426}
427
428static void doMaskedAtomicBinOpExpansion(const RISCVInstrInfo *TII,
430 MachineBasicBlock *ThisMBB,
431 MachineBasicBlock *LoopMBB,
432 MachineBasicBlock *DoneMBB,
433 AtomicRMWInst::BinOp BinOp, int Width,
434 const RISCVSubtarget *STI) {
435 assert(Width == 32 && "Should never need to expand masked 64-bit operations");
436 Register DestReg = MI.getOperand(0).getReg();
437 Register ScratchReg = MI.getOperand(1).getReg();
438 Register AddrReg = MI.getOperand(2).getReg();
439 Register IncrReg = MI.getOperand(3).getReg();
440 Register MaskReg = MI.getOperand(4).getReg();
441 AtomicOrdering Ordering =
442 static_cast<AtomicOrdering>(MI.getOperand(5).getImm());
443
444 // .loop:
445 // lr.w destreg, (alignedaddr)
446 // binop scratch, destreg, incr
447 // xor scratch, destreg, scratch
448 // and scratch, scratch, masktargetdata
449 // xor scratch, destreg, scratch
450 // sc.w scratch, scratch, (alignedaddr)
451 // bnez scratch, loop
452 BuildMI(LoopMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
453 .addReg(AddrReg);
454 switch (BinOp) {
455 default:
456 llvm_unreachable("Unexpected AtomicRMW BinOp");
458 BuildMI(LoopMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
459 .addReg(IncrReg)
460 .addImm(0);
461 break;
463 BuildMI(LoopMBB, DL, TII->get(RISCV::ADD), ScratchReg)
464 .addReg(DestReg)
465 .addReg(IncrReg);
466 break;
468 BuildMI(LoopMBB, DL, TII->get(RISCV::SUB), ScratchReg)
469 .addReg(DestReg)
470 .addReg(IncrReg);
471 break;
473 BuildMI(LoopMBB, DL, TII->get(RISCV::AND), ScratchReg)
474 .addReg(DestReg)
475 .addReg(IncrReg);
476 BuildMI(LoopMBB, DL, TII->get(RISCV::XORI), ScratchReg)
477 .addReg(ScratchReg)
478 .addImm(-1);
479 break;
480 }
481
482 insertMaskedMerge(TII, DL, LoopMBB, ScratchReg, DestReg, ScratchReg, MaskReg,
483 ScratchReg);
484
485 BuildMI(LoopMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), ScratchReg)
486 .addReg(ScratchReg)
487 .addReg(AddrReg);
488 BuildMI(LoopMBB, DL, TII->get(RISCV::BNE))
489 .addReg(ScratchReg)
490 .addReg(RISCV::X0)
491 .addMBB(LoopMBB);
492}
493
494bool RISCVExpandAtomicPseudo::expandAtomicBinOp(
496 AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
497 MachineBasicBlock::iterator &NextMBBI) {
498 MachineInstr &MI = *MBBI;
499 DebugLoc DL = MI.getDebugLoc();
500
501 MachineFunction *MF = MBB.getParent();
502 auto LoopMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
503 auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
504
505 // Insert new MBBs.
506 MF->insert(++MBB.getIterator(), LoopMBB);
507 MF->insert(++LoopMBB->getIterator(), DoneMBB);
508
509 // Set up successors and transfer remaining instructions to DoneMBB.
510 LoopMBB->addSuccessor(LoopMBB);
511 LoopMBB->addSuccessor(DoneMBB);
512 DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
513 DoneMBB->transferSuccessors(&MBB);
514 MBB.addSuccessor(LoopMBB);
515
516 if (!IsMasked)
517 doAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp, Width,
518 STI);
519 else
520 doMaskedAtomicBinOpExpansion(TII, MI, DL, &MBB, LoopMBB, DoneMBB, BinOp,
521 Width, STI);
522
523 NextMBBI = MBB.end();
524 MI.eraseFromParent();
525
529
530 return true;
531}
532
533static void insertSext(const RISCVInstrInfo *TII, DebugLoc DL,
535 Register ShamtReg) {
536 BuildMI(MBB, DL, TII->get(RISCV::SLL), ValReg)
537 .addReg(ValReg)
538 .addReg(ShamtReg);
539 BuildMI(MBB, DL, TII->get(RISCV::SRA), ValReg)
540 .addReg(ValReg)
541 .addReg(ShamtReg);
542}
543
544static void doAtomicMinMaxOpExpansion(
546 MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB,
547 MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB,
548 MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width,
549 const RISCVSubtarget *STI) {
550 Register DestReg = MI.getOperand(0).getReg();
551 Register ScratchReg = MI.getOperand(1).getReg();
552 Register AddrReg = MI.getOperand(2).getReg();
553 Register IncrReg = MI.getOperand(3).getReg();
554 AtomicOrdering Ordering =
555 static_cast<AtomicOrdering>(MI.getOperand(4).getImm());
556
557 // .loophead:
558 // lr.[w|d] dest, (addr)
559 // mv scratch, dest
560 // ifnochangeneeded scratch, incr, .looptail
561 BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)), DestReg)
562 .addReg(AddrReg);
563 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
564 .addReg(DestReg)
565 .addImm(0);
566 switch (BinOp) {
567 default:
568 llvm_unreachable("Unexpected AtomicRMW BinOp");
569 case AtomicRMWInst::Max: {
570 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
571 .addReg(ScratchReg)
572 .addReg(IncrReg)
573 .addMBB(LoopTailMBB);
574 break;
575 }
576 case AtomicRMWInst::Min: {
577 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
578 .addReg(IncrReg)
579 .addReg(ScratchReg)
580 .addMBB(LoopTailMBB);
581 break;
582 }
584 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
585 .addReg(ScratchReg)
586 .addReg(IncrReg)
587 .addMBB(LoopTailMBB);
588 break;
590 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
591 .addReg(IncrReg)
592 .addReg(ScratchReg)
593 .addMBB(LoopTailMBB);
594 break;
595 }
596
597 // .loopifbody:
598 // mv scratch, incr
599 BuildMI(LoopIfBodyMBB, DL, TII->get(RISCV::ADDI), ScratchReg)
600 .addReg(IncrReg)
601 .addImm(0);
602
603 // .looptail:
604 // sc.[w|d] scratch, scratch, (addr)
605 // bnez scratch, loop
606 BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
607 ScratchReg)
608 .addReg(ScratchReg)
609 .addReg(AddrReg);
610 BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
611 .addReg(ScratchReg)
612 .addReg(RISCV::X0)
613 .addMBB(LoopHeadMBB);
614}
615
616static void doMaskedAtomicMinMaxOpExpansion(
618 MachineBasicBlock *ThisMBB, MachineBasicBlock *LoopHeadMBB,
619 MachineBasicBlock *LoopIfBodyMBB, MachineBasicBlock *LoopTailMBB,
620 MachineBasicBlock *DoneMBB, AtomicRMWInst::BinOp BinOp, int Width,
621 const RISCVSubtarget *STI) {
622 assert(Width == 32 && "Should never need to expand masked 64-bit operations");
623 Register DestReg = MI.getOperand(0).getReg();
624 Register Scratch1Reg = MI.getOperand(1).getReg();
625 Register Scratch2Reg = MI.getOperand(2).getReg();
626 Register AddrReg = MI.getOperand(3).getReg();
627 Register IncrReg = MI.getOperand(4).getReg();
628 Register MaskReg = MI.getOperand(5).getReg();
629 bool IsSigned = BinOp == AtomicRMWInst::Min || BinOp == AtomicRMWInst::Max;
630 AtomicOrdering Ordering =
631 static_cast<AtomicOrdering>(MI.getOperand(IsSigned ? 7 : 6).getImm());
632
633 //
634 // .loophead:
635 // lr.w destreg, (alignedaddr)
636 // and scratch2, destreg, mask
637 // mv scratch1, destreg
638 // [sext scratch2 if signed min/max]
639 // ifnochangeneeded scratch2, incr, .looptail
640 BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW32(Ordering, STI)), DestReg)
641 .addReg(AddrReg);
642 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), Scratch2Reg)
643 .addReg(DestReg)
644 .addReg(MaskReg);
645 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::ADDI), Scratch1Reg)
646 .addReg(DestReg)
647 .addImm(0);
648
649 switch (BinOp) {
650 default:
651 llvm_unreachable("Unexpected AtomicRMW BinOp");
652 case AtomicRMWInst::Max: {
653 insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
654 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
655 .addReg(Scratch2Reg)
656 .addReg(IncrReg)
657 .addMBB(LoopTailMBB);
658 break;
659 }
660 case AtomicRMWInst::Min: {
661 insertSext(TII, DL, LoopHeadMBB, Scratch2Reg, MI.getOperand(6).getReg());
662 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGE))
663 .addReg(IncrReg)
664 .addReg(Scratch2Reg)
665 .addMBB(LoopTailMBB);
666 break;
667 }
669 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
670 .addReg(Scratch2Reg)
671 .addReg(IncrReg)
672 .addMBB(LoopTailMBB);
673 break;
675 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BGEU))
676 .addReg(IncrReg)
677 .addReg(Scratch2Reg)
678 .addMBB(LoopTailMBB);
679 break;
680 }
681
682 // .loopifbody:
683 // xor scratch1, destreg, incr
684 // and scratch1, scratch1, mask
685 // xor scratch1, destreg, scratch1
686 insertMaskedMerge(TII, DL, LoopIfBodyMBB, Scratch1Reg, DestReg, IncrReg,
687 MaskReg, Scratch1Reg);
688
689 // .looptail:
690 // sc.w scratch1, scratch1, (addr)
691 // bnez scratch1, loop
692 BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW32(Ordering, STI)), Scratch1Reg)
693 .addReg(Scratch1Reg)
694 .addReg(AddrReg);
695 BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
696 .addReg(Scratch1Reg)
697 .addReg(RISCV::X0)
698 .addMBB(LoopHeadMBB);
699}
700
701bool RISCVExpandAtomicPseudo::expandAtomicMinMaxOp(
703 AtomicRMWInst::BinOp BinOp, bool IsMasked, int Width,
704 MachineBasicBlock::iterator &NextMBBI) {
705 // Using MIN(U)/MAX(U) is preferrable if permitted
706 if (STI->hasPermissiveZalrsc() && STI->hasStdExtZbb() && !IsMasked)
707 return expandAtomicBinOp(MBB, MBBI, BinOp, IsMasked, Width, NextMBBI);
708
709 MachineInstr &MI = *MBBI;
710 DebugLoc DL = MI.getDebugLoc();
711 MachineFunction *MF = MBB.getParent();
712 auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
713 auto LoopIfBodyMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
714 auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
715 auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
716
717 // Insert new MBBs.
718 MF->insert(++MBB.getIterator(), LoopHeadMBB);
719 MF->insert(++LoopHeadMBB->getIterator(), LoopIfBodyMBB);
720 MF->insert(++LoopIfBodyMBB->getIterator(), LoopTailMBB);
721 MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
722
723 // Set up successors and transfer remaining instructions to DoneMBB.
724 LoopHeadMBB->addSuccessor(LoopIfBodyMBB);
725 LoopHeadMBB->addSuccessor(LoopTailMBB);
726 LoopIfBodyMBB->addSuccessor(LoopTailMBB);
727 LoopTailMBB->addSuccessor(LoopHeadMBB);
728 LoopTailMBB->addSuccessor(DoneMBB);
729 DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
730 DoneMBB->transferSuccessors(&MBB);
731 MBB.addSuccessor(LoopHeadMBB);
732
733 if (!IsMasked)
734 doAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB, LoopIfBodyMBB,
735 LoopTailMBB, DoneMBB, BinOp, Width, STI);
736 else
737 doMaskedAtomicMinMaxOpExpansion(TII, MI, DL, &MBB, LoopHeadMBB,
738 LoopIfBodyMBB, LoopTailMBB, DoneMBB, BinOp,
739 Width, STI);
740
741 NextMBBI = MBB.end();
742 MI.eraseFromParent();
743
745 computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
746 computeAndAddLiveIns(LiveRegs, *LoopIfBodyMBB);
747 computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
749
750 return true;
751}
752
753// If a BNE on the cmpxchg comparison result immediately follows the cmpxchg
754// operation, it can be folded into the cmpxchg expansion by
755// modifying the branch within 'LoopHead' (which performs the same
756// comparison). This is a valid transformation because after altering the
757// LoopHead's BNE destination, the BNE following the cmpxchg becomes
758// redundant and and be deleted. In the case of a masked cmpxchg, an
759// appropriate AND and BNE must be matched.
760//
761// On success, returns true and deletes the matching BNE or AND+BNE, sets the
762// LoopHeadBNETarget argument to the target that should be used within the
763// loop head, and removes that block as a successor to MBB.
764bool tryToFoldBNEOnCmpXchgResult(MachineBasicBlock &MBB,
766 Register DestReg, Register CmpValReg,
767 Register MaskReg,
768 MachineBasicBlock *&LoopHeadBNETarget) {
770 auto E = MBB.end();
771 if (MBBI == E)
772 return false;
774
775 // If we have a masked cmpxchg, match AND dst, DestReg, MaskReg.
776 if (MaskReg.isValid()) {
777 if (MBBI == E || MBBI->getOpcode() != RISCV::AND)
778 return false;
779 Register ANDOp1 = MBBI->getOperand(1).getReg();
780 Register ANDOp2 = MBBI->getOperand(2).getReg();
781 if (!(ANDOp1 == DestReg && ANDOp2 == MaskReg) &&
782 !(ANDOp1 == MaskReg && ANDOp2 == DestReg))
783 return false;
784 // We now expect the BNE to use the result of the AND as an operand.
785 DestReg = MBBI->getOperand(0).getReg();
786 ToErase.push_back(&*MBBI);
788 }
789
790 // Match BNE DestReg, MaskReg.
791 if (MBBI == E || MBBI->getOpcode() != RISCV::BNE)
792 return false;
793 Register BNEOp0 = MBBI->getOperand(0).getReg();
794 Register BNEOp1 = MBBI->getOperand(1).getReg();
795 if (!(BNEOp0 == DestReg && BNEOp1 == CmpValReg) &&
796 !(BNEOp0 == CmpValReg && BNEOp1 == DestReg))
797 return false;
798
799 // Make sure the branch is the only user of the AND.
800 if (MaskReg.isValid()) {
801 if (BNEOp0 == DestReg && !MBBI->getOperand(0).isKill())
802 return false;
803 if (BNEOp1 == DestReg && !MBBI->getOperand(1).isKill())
804 return false;
805 }
806
807 ToErase.push_back(&*MBBI);
808 LoopHeadBNETarget = MBBI->getOperand(2).getMBB();
810 if (MBBI != E)
811 return false;
812
813 MBB.removeSuccessor(LoopHeadBNETarget);
814 for (auto *MI : ToErase)
815 MI->eraseFromParent();
816 return true;
817}
818
819bool RISCVExpandAtomicPseudo::expandAtomicCmpXchg(
821 int Width, MachineBasicBlock::iterator &NextMBBI) {
822 MachineInstr &MI = *MBBI;
823 DebugLoc DL = MI.getDebugLoc();
824 MachineFunction *MF = MBB.getParent();
825 auto LoopHeadMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
826 auto LoopTailMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
827 auto DoneMBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
828
829 Register DestReg = MI.getOperand(0).getReg();
830 Register ScratchReg = MI.getOperand(1).getReg();
831 Register AddrReg = MI.getOperand(2).getReg();
832 Register CmpValReg = MI.getOperand(3).getReg();
833 Register NewValReg = MI.getOperand(4).getReg();
834 Register MaskReg = IsMasked ? MI.getOperand(5).getReg() : Register();
835
836 MachineBasicBlock *LoopHeadBNETarget = DoneMBB;
837 tryToFoldBNEOnCmpXchgResult(MBB, std::next(MBBI), DestReg, CmpValReg, MaskReg,
838 LoopHeadBNETarget);
839
840 // Insert new MBBs.
841 MF->insert(++MBB.getIterator(), LoopHeadMBB);
842 MF->insert(++LoopHeadMBB->getIterator(), LoopTailMBB);
843 MF->insert(++LoopTailMBB->getIterator(), DoneMBB);
844
845 // Set up successors and transfer remaining instructions to DoneMBB.
846 LoopHeadMBB->addSuccessor(LoopTailMBB);
847 LoopHeadMBB->addSuccessor(LoopHeadBNETarget);
848 LoopTailMBB->addSuccessor(DoneMBB);
849 LoopTailMBB->addSuccessor(LoopHeadMBB);
850 DoneMBB->splice(DoneMBB->end(), &MBB, MI, MBB.end());
851 DoneMBB->transferSuccessors(&MBB);
852 MBB.addSuccessor(LoopHeadMBB);
853
854 AtomicOrdering Ordering =
855 static_cast<AtomicOrdering>(MI.getOperand(IsMasked ? 6 : 5).getImm());
856
857 if (!IsMasked) {
858 // .loophead:
859 // lr.[w|d] dest, (addr)
860 // bne dest, cmpval, done
861 BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
862 DestReg)
863 .addReg(AddrReg);
864 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
865 .addReg(DestReg)
866 .addReg(CmpValReg)
867 .addMBB(LoopHeadBNETarget);
868 // .looptail:
869 // sc.[w|d] scratch, newval, (addr)
870 // bnez scratch, loophead
871 BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
872 ScratchReg)
873 .addReg(NewValReg)
874 .addReg(AddrReg);
875 BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
876 .addReg(ScratchReg)
877 .addReg(RISCV::X0)
878 .addMBB(LoopHeadMBB);
879 } else {
880 // .loophead:
881 // lr.w dest, (addr)
882 // and scratch, dest, mask
883 // bne scratch, cmpval, done
884 Register MaskReg = MI.getOperand(5).getReg();
885 BuildMI(LoopHeadMBB, DL, TII->get(getLRForRMW(Ordering, Width, STI)),
886 DestReg)
887 .addReg(AddrReg);
888 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::AND), ScratchReg)
889 .addReg(DestReg)
890 .addReg(MaskReg);
891 BuildMI(LoopHeadMBB, DL, TII->get(RISCV::BNE))
892 .addReg(ScratchReg)
893 .addReg(CmpValReg)
894 .addMBB(LoopHeadBNETarget);
895
896 // .looptail:
897 // xor scratch, dest, newval
898 // and scratch, scratch, mask
899 // xor scratch, dest, scratch
900 // sc.w scratch, scratch, (adrr)
901 // bnez scratch, loophead
902 insertMaskedMerge(TII, DL, LoopTailMBB, ScratchReg, DestReg, NewValReg,
903 MaskReg, ScratchReg);
904 BuildMI(LoopTailMBB, DL, TII->get(getSCForRMW(Ordering, Width, STI)),
905 ScratchReg)
906 .addReg(ScratchReg)
907 .addReg(AddrReg);
908 BuildMI(LoopTailMBB, DL, TII->get(RISCV::BNE))
909 .addReg(ScratchReg)
910 .addReg(RISCV::X0)
911 .addMBB(LoopHeadMBB);
912 }
913
914 NextMBBI = MBB.end();
915 MI.eraseFromParent();
916
918 computeAndAddLiveIns(LiveRegs, *LoopHeadMBB);
919 computeAndAddLiveIns(LiveRegs, *LoopTailMBB);
921
922 return true;
923}
924
925} // end of anonymous namespace
926
927INITIALIZE_PASS(RISCVExpandAtomicPseudo, "riscv-expand-atomic-pseudo",
929
931 return new RISCVExpandAtomicPseudo();
932}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
MachineBasicBlock MachineBasicBlock::iterator MBBI
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define RISCV_EXPAND_ATOMIC_PSEUDO_NAME
static unsigned getInstSizeInBytes(const MachineInstr &MI, const SystemZInstrInfo *TII)
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ UMax
*p = old >unsigned v ? old : v
@ Nand
*p = ~(old & v)
A debug info location.
Definition DebugLoc.h:124
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
A set of physical registers with utility functions to track liveness when walking backward/forward th...
LLVM_ABI void transferSuccessors(MachineBasicBlock *FromMBB)
Transfers all the successors from MBB to this machine basic block (i.e., copies all the successors Fr...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineInstr - Allocate a new MachineInstr.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
const RISCVInstrInfo * getInstrInfo() const override
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isValid() const
Definition Register.h:107
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
self_iterator getIterator()
Definition ilist_node.h:123
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createRISCVExpandAtomicPseudoPass()
IterT skipDebugInstructionsForward(IterT It, IterT End, bool SkipPseudoOp=true)
Increment It until it points to a non-debug instruction or to End and return the resulting iterator.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
AtomicOrdering
Atomic ordering for LLVM's memory model.
void computeAndAddLiveIns(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB)
Convenience function combining computeLiveIns() and addLiveIns().