LLVM 20.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
29#include "llvm/IR/Attributes.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constant.h"
32#include "llvm/IR/Constants.h"
33#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/Instruction.h"
40#include "llvm/IR/MDBuilder.h"
42#include "llvm/IR/Module.h"
43#include "llvm/IR/Type.h"
44#include "llvm/IR/User.h"
45#include "llvm/IR/Value.h"
47#include "llvm/Pass.h"
50#include "llvm/Support/Debug.h"
55#include <cassert>
56#include <cstdint>
57#include <iterator>
58
59using namespace llvm;
60
61#define DEBUG_TYPE "atomic-expand"
62
63namespace {
64
65class AtomicExpandImpl {
66 const TargetLowering *TLI = nullptr;
67 const DataLayout *DL = nullptr;
68
69private:
70 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
71 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
72 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
73 bool tryExpandAtomicLoad(LoadInst *LI);
74 bool expandAtomicLoadToLL(LoadInst *LI);
75 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
76 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
77 bool tryExpandAtomicStore(StoreInst *SI);
78 void expandAtomicStore(StoreInst *SI);
79 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
80 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
81 Value *
82 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
83 Align AddrAlign, AtomicOrdering MemOpOrder,
84 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
85 void expandAtomicOpToLLSC(
86 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
87 AtomicOrdering MemOpOrder,
88 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
89 void expandPartwordAtomicRMW(
91 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
92 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
93 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
94 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
95
96 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
97 static Value *insertRMWCmpXchgLoop(
98 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
99 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
100 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
101 CreateCmpXchgInstFun CreateCmpXchg);
102 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
103
104 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
105 bool isIdempotentRMW(AtomicRMWInst *RMWI);
106 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
107
108 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
109 Value *PointerOperand, Value *ValueOperand,
110 Value *CASExpected, AtomicOrdering Ordering,
111 AtomicOrdering Ordering2,
112 ArrayRef<RTLIB::Libcall> Libcalls);
113 void expandAtomicLoadToLibcall(LoadInst *LI);
114 void expandAtomicStoreToLibcall(StoreInst *LI);
115 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
116 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
117
118 friend bool
120 CreateCmpXchgInstFun CreateCmpXchg);
121
122public:
123 bool run(Function &F, const TargetMachine *TM);
124};
125
126class AtomicExpandLegacy : public FunctionPass {
127public:
128 static char ID; // Pass identification, replacement for typeid
129
130 AtomicExpandLegacy() : FunctionPass(ID) {
132 }
133
134 bool runOnFunction(Function &F) override;
135};
136
137// IRBuilder to be used for replacement atomic instructions.
138struct ReplacementIRBuilder
139 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
140 MDNode *MMRAMD = nullptr;
141
142 // Preserves the DebugLoc from I, and preserves still valid metadata.
143 // Enable StrictFP builder mode when appropriate.
144 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
145 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
147 [this](Instruction *I) { addMMRAMD(I); })) {
149 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
150 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
151 this->setIsFPConstrained(true);
152
153 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
154 }
155
156 void addMMRAMD(Instruction *I) {
158 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
159 }
160};
161
162} // end anonymous namespace
163
164char AtomicExpandLegacy::ID = 0;
165
166char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
167
169 "Expand Atomic instructions", false, false)
171INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
172 "Expand Atomic instructions", false, false)
173
174// Helper functions to retrieve the size of atomic instructions.
175static unsigned getAtomicOpSize(LoadInst *LI) {
176 const DataLayout &DL = LI->getDataLayout();
177 return DL.getTypeStoreSize(LI->getType());
178}
179
180static unsigned getAtomicOpSize(StoreInst *SI) {
181 const DataLayout &DL = SI->getDataLayout();
182 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
183}
184
185static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
186 const DataLayout &DL = RMWI->getDataLayout();
187 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
188}
189
190static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
191 const DataLayout &DL = CASI->getDataLayout();
192 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
193}
194
195// Determine if a particular atomic operation has a supported size,
196// and is of appropriate alignment, to be passed through for target
197// lowering. (Versus turning into a __atomic libcall)
198template <typename Inst>
199static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
200 unsigned Size = getAtomicOpSize(I);
201 Align Alignment = I->getAlign();
202 return Alignment >= Size &&
204}
205
206bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
207 const auto *Subtarget = TM->getSubtargetImpl(F);
208 if (!Subtarget->enableAtomicExpand())
209 return false;
210 TLI = Subtarget->getTargetLowering();
211 DL = &F.getDataLayout();
212
214
215 // Changing control-flow while iterating through it is a bad idea, so gather a
216 // list of all atomic instructions before we start.
217 for (Instruction &I : instructions(F))
218 if (I.isAtomic() && !isa<FenceInst>(&I))
219 AtomicInsts.push_back(&I);
220
221 bool MadeChange = false;
222 for (auto *I : AtomicInsts) {
223 auto LI = dyn_cast<LoadInst>(I);
224 auto SI = dyn_cast<StoreInst>(I);
225 auto RMWI = dyn_cast<AtomicRMWInst>(I);
226 auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
227 assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
228
229 // If the Size/Alignment is not supported, replace with a libcall.
230 if (LI) {
231 if (!atomicSizeSupported(TLI, LI)) {
232 expandAtomicLoadToLibcall(LI);
233 MadeChange = true;
234 continue;
235 }
236 } else if (SI) {
237 if (!atomicSizeSupported(TLI, SI)) {
238 expandAtomicStoreToLibcall(SI);
239 MadeChange = true;
240 continue;
241 }
242 } else if (RMWI) {
243 if (!atomicSizeSupported(TLI, RMWI)) {
244 expandAtomicRMWToLibcall(RMWI);
245 MadeChange = true;
246 continue;
247 }
248 } else if (CASI) {
249 if (!atomicSizeSupported(TLI, CASI)) {
250 expandAtomicCASToLibcall(CASI);
251 MadeChange = true;
252 continue;
253 }
254 }
255
256 if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
257 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
258 I = LI = convertAtomicLoadToIntegerType(LI);
259 MadeChange = true;
260 } else if (SI &&
261 TLI->shouldCastAtomicStoreInIR(SI) ==
262 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
263 I = SI = convertAtomicStoreToIntegerType(SI);
264 MadeChange = true;
265 } else if (RMWI &&
266 TLI->shouldCastAtomicRMWIInIR(RMWI) ==
267 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
268 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
269 MadeChange = true;
270 } else if (CASI) {
271 // TODO: when we're ready to make the change at the IR level, we can
272 // extend convertCmpXchgToInteger for floating point too.
273 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
274 // TODO: add a TLI hook to control this so that each target can
275 // convert to lowering the original type one at a time.
276 I = CASI = convertCmpXchgToIntegerType(CASI);
277 MadeChange = true;
278 }
279 }
280
281 if (TLI->shouldInsertFencesForAtomic(I)) {
282 auto FenceOrdering = AtomicOrdering::Monotonic;
283 if (LI && isAcquireOrStronger(LI->getOrdering())) {
284 FenceOrdering = LI->getOrdering();
285 LI->setOrdering(AtomicOrdering::Monotonic);
286 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
287 FenceOrdering = SI->getOrdering();
288 SI->setOrdering(AtomicOrdering::Monotonic);
289 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
290 isAcquireOrStronger(RMWI->getOrdering()))) {
291 FenceOrdering = RMWI->getOrdering();
292 RMWI->setOrdering(AtomicOrdering::Monotonic);
293 } else if (CASI &&
294 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
295 TargetLoweringBase::AtomicExpansionKind::None &&
296 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
297 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
298 isAcquireOrStronger(CASI->getFailureOrdering()))) {
299 // If a compare and swap is lowered to LL/SC, we can do smarter fence
300 // insertion, with a stronger one on the success path than on the
301 // failure path. As a result, fence insertion is directly done by
302 // expandAtomicCmpXchg in that case.
303 FenceOrdering = CASI->getMergedOrdering();
304 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
305 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
306 }
307
308 if (FenceOrdering != AtomicOrdering::Monotonic) {
309 MadeChange |= bracketInstWithFences(I, FenceOrdering);
310 }
311 } else if (I->hasAtomicStore() &&
312 TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
313 auto FenceOrdering = AtomicOrdering::Monotonic;
314 if (SI)
315 FenceOrdering = SI->getOrdering();
316 else if (RMWI)
317 FenceOrdering = RMWI->getOrdering();
318 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
319 TargetLoweringBase::AtomicExpansionKind::LLSC)
320 // LLSC is handled in expandAtomicCmpXchg().
321 FenceOrdering = CASI->getSuccessOrdering();
322
323 IRBuilder Builder(I);
324 if (auto TrailingFence =
325 TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
326 TrailingFence->moveAfter(I);
327 MadeChange = true;
328 }
329 }
330
331 if (LI)
332 MadeChange |= tryExpandAtomicLoad(LI);
333 else if (SI)
334 MadeChange |= tryExpandAtomicStore(SI);
335 else if (RMWI) {
336 // There are two different ways of expanding RMW instructions:
337 // - into a load if it is idempotent
338 // - into a Cmpxchg/LL-SC loop otherwise
339 // we try them in that order.
340
341 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
342 MadeChange = true;
343 } else {
344 MadeChange |= tryExpandAtomicRMW(RMWI);
345 }
346 } else if (CASI)
347 MadeChange |= tryExpandAtomicCmpXchg(CASI);
348 }
349 return MadeChange;
350}
351
352bool AtomicExpandLegacy::runOnFunction(Function &F) {
353
354 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
355 if (!TPC)
356 return false;
357 auto *TM = &TPC->getTM<TargetMachine>();
358 AtomicExpandImpl AE;
359 return AE.run(F, TM);
360}
361
363 return new AtomicExpandLegacy();
364}
365
368 AtomicExpandImpl AE;
369
370 bool Changed = AE.run(F, TM);
371 if (!Changed)
372 return PreservedAnalyses::all();
373
375}
376
377bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
378 AtomicOrdering Order) {
379 ReplacementIRBuilder Builder(I, *DL);
380
381 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
382
383 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
384 // We have a guard here because not every atomic operation generates a
385 // trailing fence.
386 if (TrailingFence)
387 TrailingFence->moveAfter(I);
388
389 return (LeadingFence || TrailingFence);
390}
391
392/// Get the iX type with the same bitwidth as T.
394AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
395 EVT VT = TLI->getMemValueType(DL, T);
396 unsigned BitWidth = VT.getStoreSizeInBits();
397 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
398 return IntegerType::get(T->getContext(), BitWidth);
399}
400
401/// Convert an atomic load of a non-integral type to an integer load of the
402/// equivalent bitwidth. See the function comment on
403/// convertAtomicStoreToIntegerType for background.
404LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
405 auto *M = LI->getModule();
406 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
407
408 ReplacementIRBuilder Builder(LI, *DL);
409
411
412 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
413 NewLI->setAlignment(LI->getAlign());
414 NewLI->setVolatile(LI->isVolatile());
415 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
416 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
417
418 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
419 LI->replaceAllUsesWith(NewVal);
420 LI->eraseFromParent();
421 return NewLI;
422}
423
425AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
426 auto *M = RMWI->getModule();
427 Type *NewTy =
428 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
429
430 ReplacementIRBuilder Builder(RMWI, *DL);
431
432 Value *Addr = RMWI->getPointerOperand();
433 Value *Val = RMWI->getValOperand();
434 Value *NewVal = Val->getType()->isPointerTy()
435 ? Builder.CreatePtrToInt(Val, NewTy)
436 : Builder.CreateBitCast(Val, NewTy);
437
438 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
439 RMWI->getAlign(), RMWI->getOrdering(),
440 RMWI->getSyncScopeID());
441 NewRMWI->setVolatile(RMWI->isVolatile());
442 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
443
444 Value *NewRVal = RMWI->getType()->isPointerTy()
445 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
446 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
447 RMWI->replaceAllUsesWith(NewRVal);
448 RMWI->eraseFromParent();
449 return NewRMWI;
450}
451
452bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
453 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
455 return false;
457 expandAtomicOpToLLSC(
458 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
459 LI->getOrdering(),
460 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
461 return true;
463 return expandAtomicLoadToLL(LI);
465 return expandAtomicLoadToCmpXchg(LI);
468 return true;
469 default:
470 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
471 }
472}
473
474bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
475 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
477 return false;
479 expandAtomicStore(SI);
480 return true;
482 SI->setAtomic(AtomicOrdering::NotAtomic);
483 return true;
484 default:
485 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
486 }
487}
488
489bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
490 ReplacementIRBuilder Builder(LI, *DL);
491
492 // On some architectures, load-linked instructions are atomic for larger
493 // sizes than normal loads. For example, the only 64-bit load guaranteed
494 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
495 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
496 LI->getPointerOperand(), LI->getOrdering());
497 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
498
499 LI->replaceAllUsesWith(Val);
500 LI->eraseFromParent();
501
502 return true;
503}
504
505bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
506 ReplacementIRBuilder Builder(LI, *DL);
507 AtomicOrdering Order = LI->getOrdering();
508 if (Order == AtomicOrdering::Unordered)
510
512 Type *Ty = LI->getType();
513 Constant *DummyVal = Constant::getNullValue(Ty);
514
515 Value *Pair = Builder.CreateAtomicCmpXchg(
516 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
518 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
519
520 LI->replaceAllUsesWith(Loaded);
521 LI->eraseFromParent();
522
523 return true;
524}
525
526/// Convert an atomic store of a non-integral type to an integer store of the
527/// equivalent bitwidth. We used to not support floating point or vector
528/// atomics in the IR at all. The backends learned to deal with the bitcast
529/// idiom because that was the only way of expressing the notion of a atomic
530/// float or vector store. The long term plan is to teach each backend to
531/// instruction select from the original atomic store, but as a migration
532/// mechanism, we convert back to the old format which the backends understand.
533/// Each backend will need individual work to recognize the new format.
534StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
535 ReplacementIRBuilder Builder(SI, *DL);
536 auto *M = SI->getModule();
537 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
538 M->getDataLayout());
539 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
540
541 Value *Addr = SI->getPointerOperand();
542
543 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
544 NewSI->setAlignment(SI->getAlign());
545 NewSI->setVolatile(SI->isVolatile());
546 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
547 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
548 SI->eraseFromParent();
549 return NewSI;
550}
551
552void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) {
553 // This function is only called on atomic stores that are too large to be
554 // atomic if implemented as a native store. So we replace them by an
555 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
556 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
557 // It is the responsibility of the target to only signal expansion via
558 // shouldExpandAtomicRMW in cases where this is required and possible.
559 ReplacementIRBuilder Builder(SI, *DL);
560 AtomicOrdering Ordering = SI->getOrdering();
564 : Ordering;
565 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
566 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
567 SI->getAlign(), RMWOrdering);
568 SI->eraseFromParent();
569
570 // Now we have an appropriate swap instruction, lower it as usual.
571 tryExpandAtomicRMW(AI);
572}
573
575 Value *Loaded, Value *NewVal, Align AddrAlign,
576 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
577 Value *&Success, Value *&NewLoaded) {
578 Type *OrigTy = NewVal->getType();
579
580 // This code can go away when cmpxchg supports FP and vector types.
581 assert(!OrigTy->isPointerTy());
582 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
583 if (NeedBitcast) {
584 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
585 NewVal = Builder.CreateBitCast(NewVal, IntTy);
586 Loaded = Builder.CreateBitCast(Loaded, IntTy);
587 }
588
589 Value *Pair = Builder.CreateAtomicCmpXchg(
590 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
592 Success = Builder.CreateExtractValue(Pair, 1, "success");
593 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
594
595 if (NeedBitcast)
596 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
597}
598
599bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
600 LLVMContext &Ctx = AI->getModule()->getContext();
601 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
602 switch (Kind) {
604 return false;
606 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
607 unsigned ValueSize = getAtomicOpSize(AI);
608 if (ValueSize < MinCASSize) {
609 expandPartwordAtomicRMW(AI,
611 } else {
612 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
613 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
614 AI->getValOperand());
615 };
616 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
617 AI->getAlign(), AI->getOrdering(), PerformOp);
618 }
619 return true;
620 }
622 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
623 unsigned ValueSize = getAtomicOpSize(AI);
624 if (ValueSize < MinCASSize) {
625 expandPartwordAtomicRMW(AI,
627 } else {
629 Ctx.getSyncScopeNames(SSNs);
630 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
631 ? "system"
632 : SSNs[AI->getSyncScopeID()];
634 ORE.emit([&]() {
635 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
636 << "A compare and swap loop was generated for an atomic "
637 << AI->getOperationName(AI->getOperation()) << " operation at "
638 << MemScope << " memory scope";
639 });
641 }
642 return true;
643 }
645 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
646 unsigned ValueSize = getAtomicOpSize(AI);
647 if (ValueSize < MinCASSize) {
649 // Widen And/Or/Xor and give the target another chance at expanding it.
652 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
653 return true;
654 }
655 }
656 expandAtomicRMWToMaskedIntrinsic(AI);
657 return true;
658 }
660 TLI->emitBitTestAtomicRMWIntrinsic(AI);
661 return true;
662 }
664 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
665 return true;
666 }
668 return lowerAtomicRMWInst(AI);
670 TLI->emitExpandAtomicRMW(AI);
671 return true;
672 default:
673 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
674 }
675}
676
677namespace {
678
679struct PartwordMaskValues {
680 // These three fields are guaranteed to be set by createMaskInstrs.
681 Type *WordType = nullptr;
682 Type *ValueType = nullptr;
683 Type *IntValueType = nullptr;
684 Value *AlignedAddr = nullptr;
685 Align AlignedAddrAlignment;
686 // The remaining fields can be null.
687 Value *ShiftAmt = nullptr;
688 Value *Mask = nullptr;
689 Value *Inv_Mask = nullptr;
690};
691
693raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
694 auto PrintObj = [&O](auto *V) {
695 if (V)
696 O << *V;
697 else
698 O << "nullptr";
699 O << '\n';
700 };
701 O << "PartwordMaskValues {\n";
702 O << " WordType: ";
703 PrintObj(PMV.WordType);
704 O << " ValueType: ";
705 PrintObj(PMV.ValueType);
706 O << " AlignedAddr: ";
707 PrintObj(PMV.AlignedAddr);
708 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
709 O << " ShiftAmt: ";
710 PrintObj(PMV.ShiftAmt);
711 O << " Mask: ";
712 PrintObj(PMV.Mask);
713 O << " Inv_Mask: ";
714 PrintObj(PMV.Inv_Mask);
715 O << "}\n";
716 return O;
717}
718
719} // end anonymous namespace
720
721/// This is a helper function which builds instructions to provide
722/// values necessary for partword atomic operations. It takes an
723/// incoming address, Addr, and ValueType, and constructs the address,
724/// shift-amounts and masks needed to work with a larger value of size
725/// WordSize.
726///
727/// AlignedAddr: Addr rounded down to a multiple of WordSize
728///
729/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
730/// from AlignAddr for it to have the same value as if
731/// ValueType was loaded from Addr.
732///
733/// Mask: Value to mask with the value loaded from AlignAddr to
734/// include only the part that would've been loaded from Addr.
735///
736/// Inv_Mask: The inverse of Mask.
737static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
739 Value *Addr, Align AddrAlign,
740 unsigned MinWordSize) {
741 PartwordMaskValues PMV;
742
743 Module *M = I->getModule();
744 LLVMContext &Ctx = M->getContext();
745 const DataLayout &DL = M->getDataLayout();
746 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
747
748 PMV.ValueType = PMV.IntValueType = ValueType;
749 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
750 PMV.IntValueType =
751 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
752
753 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
754 : ValueType;
755 if (PMV.ValueType == PMV.WordType) {
756 PMV.AlignedAddr = Addr;
757 PMV.AlignedAddrAlignment = AddrAlign;
758 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
759 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
760 return PMV;
761 }
762
763 PMV.AlignedAddrAlignment = Align(MinWordSize);
764
765 assert(ValueSize < MinWordSize);
766
767 PointerType *PtrTy = cast<PointerType>(Addr->getType());
768 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
769 Value *PtrLSB;
770
771 if (AddrAlign < MinWordSize) {
772 PMV.AlignedAddr = Builder.CreateIntrinsic(
773 Intrinsic::ptrmask, {PtrTy, IntTy},
774 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
775 "AlignedAddr");
776
777 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
778 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
779 } else {
780 // If the alignment is high enough, the LSB are known 0.
781 PMV.AlignedAddr = Addr;
782 PtrLSB = ConstantInt::getNullValue(IntTy);
783 }
784
785 if (DL.isLittleEndian()) {
786 // turn bytes into bits
787 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
788 } else {
789 // turn bytes into bits, and count from the other side.
790 PMV.ShiftAmt = Builder.CreateShl(
791 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
792 }
793
794 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
795 PMV.Mask = Builder.CreateShl(
796 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
797 "Mask");
798
799 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
800
801 return PMV;
802}
803
804static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
805 const PartwordMaskValues &PMV) {
806 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
807 if (PMV.WordType == PMV.ValueType)
808 return WideWord;
809
810 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
811 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
812 return Builder.CreateBitCast(Trunc, PMV.ValueType);
813}
814
815static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
816 Value *Updated, const PartwordMaskValues &PMV) {
817 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
818 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
819 if (PMV.WordType == PMV.ValueType)
820 return Updated;
821
822 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
823
824 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
825 Value *Shift =
826 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
827 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
828 Value *Or = Builder.CreateOr(And, Shift, "inserted");
829 return Or;
830}
831
832/// Emit IR to implement a masked version of a given atomicrmw
833/// operation. (That is, only the bits under the Mask should be
834/// affected by the operation)
836 IRBuilderBase &Builder, Value *Loaded,
837 Value *Shifted_Inc, Value *Inc,
838 const PartwordMaskValues &PMV) {
839 // TODO: update to use
840 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
841 // to merge bits from two values without requiring PMV.Inv_Mask.
842 switch (Op) {
843 case AtomicRMWInst::Xchg: {
844 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
845 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
846 return FinalVal;
847 }
851 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
854 case AtomicRMWInst::Nand: {
855 // The other arithmetic ops need to be masked into place.
856 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
857 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
858 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
859 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
860 return FinalVal;
861 }
872 // Finally, other ops will operate on the full value, so truncate down to
873 // the original size, and expand out again after doing the
874 // operation. Bitcasts will be inserted for FP values.
875 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
876 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
877 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
878 return FinalVal;
879 }
880 default:
881 llvm_unreachable("Unknown atomic op");
882 }
883}
884
885/// Expand a sub-word atomicrmw operation into an appropriate
886/// word-sized operation.
887///
888/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
889/// way as a typical atomicrmw expansion. The only difference here is
890/// that the operation inside of the loop may operate upon only a
891/// part of the value.
892void AtomicExpandImpl::expandPartwordAtomicRMW(
894 // Widen And/Or/Xor and give the target another chance at expanding it.
898 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
899 return;
900 }
901 AtomicOrdering MemOpOrder = AI->getOrdering();
902 SyncScope::ID SSID = AI->getSyncScopeID();
903
904 ReplacementIRBuilder Builder(AI, *DL);
905
906 PartwordMaskValues PMV =
907 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
908 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
909
910 Value *ValOperand_Shifted = nullptr;
913 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
914 ValOperand_Shifted =
915 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
916 "ValOperand_Shifted");
917 }
918
919 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
920 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
921 AI->getValOperand(), PMV);
922 };
923
924 Value *OldResult;
926 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
927 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
928 PerformPartwordOp, createCmpXchgInstFun);
929 } else {
931 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
932 PMV.AlignedAddrAlignment, MemOpOrder,
933 PerformPartwordOp);
934 }
935
936 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
937 AI->replaceAllUsesWith(FinalOldResult);
938 AI->eraseFromParent();
939}
940
941/// Copy metadata that's safe to preserve when widening atomics.
943 const Instruction &Source) {
945 Source.getAllMetadata(MD);
946 LLVMContext &Ctx = Dest.getContext();
947 MDBuilder MDB(Ctx);
948
949 for (auto [ID, N] : MD) {
950 switch (ID) {
951 case LLVMContext::MD_dbg:
952 case LLVMContext::MD_tbaa:
953 case LLVMContext::MD_tbaa_struct:
954 case LLVMContext::MD_alias_scope:
955 case LLVMContext::MD_noalias:
956 case LLVMContext::MD_access_group:
957 case LLVMContext::MD_mmra:
958 Dest.setMetadata(ID, N);
959 break;
960 default:
961 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
962 Dest.setMetadata(ID, N);
963 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
964 Dest.setMetadata(ID, N);
965
966 break;
967 }
968 }
969}
970
971// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
972AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
973 ReplacementIRBuilder Builder(AI, *DL);
975
978 "Unable to widen operation");
979
980 PartwordMaskValues PMV =
981 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
982 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
983
984 Value *ValOperand_Shifted =
985 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
986 PMV.ShiftAmt, "ValOperand_Shifted");
987
988 Value *NewOperand;
989
990 if (Op == AtomicRMWInst::And)
991 NewOperand =
992 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
993 else
994 NewOperand = ValOperand_Shifted;
995
996 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
997 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
998 AI->getOrdering(), AI->getSyncScopeID());
999
1000 copyMetadataForAtomic(*NewAI, *AI);
1001
1002 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1003 AI->replaceAllUsesWith(FinalOldResult);
1004 AI->eraseFromParent();
1005 return NewAI;
1006}
1007
1008bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1009 // The basic idea here is that we're expanding a cmpxchg of a
1010 // smaller memory size up to a word-sized cmpxchg. To do this, we
1011 // need to add a retry-loop for strong cmpxchg, so that
1012 // modifications to other parts of the word don't cause a spurious
1013 // failure.
1014
1015 // This generates code like the following:
1016 // [[Setup mask values PMV.*]]
1017 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1018 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1019 // %InitLoaded = load i32* %addr
1020 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1021 // br partword.cmpxchg.loop
1022 // partword.cmpxchg.loop:
1023 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1024 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1025 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1026 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1027 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1028 // i32 %FullWord_NewVal success_ordering failure_ordering
1029 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1030 // %Success = extractvalue { i32, i1 } %NewCI, 1
1031 // br i1 %Success, label %partword.cmpxchg.end,
1032 // label %partword.cmpxchg.failure
1033 // partword.cmpxchg.failure:
1034 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1035 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1036 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1037 // label %partword.cmpxchg.end
1038 // partword.cmpxchg.end:
1039 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1040 // %FinalOldVal = trunc i32 %tmp1 to i8
1041 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1042 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1043
1044 Value *Addr = CI->getPointerOperand();
1045 Value *Cmp = CI->getCompareOperand();
1046 Value *NewVal = CI->getNewValOperand();
1047
1048 BasicBlock *BB = CI->getParent();
1049 Function *F = BB->getParent();
1050 ReplacementIRBuilder Builder(CI, *DL);
1051 LLVMContext &Ctx = Builder.getContext();
1052
1053 BasicBlock *EndBB =
1054 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1055 auto FailureBB =
1056 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1057 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1058
1059 // The split call above "helpfully" added a branch at the end of BB
1060 // (to the wrong place).
1061 std::prev(BB->end())->eraseFromParent();
1062 Builder.SetInsertPoint(BB);
1063
1064 PartwordMaskValues PMV =
1065 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1066 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1067
1068 // Shift the incoming values over, into the right location in the word.
1069 Value *NewVal_Shifted =
1070 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1071 Value *Cmp_Shifted =
1072 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1073
1074 // Load the entire current word, and mask into place the expected and new
1075 // values
1076 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1077 InitLoaded->setVolatile(CI->isVolatile());
1078 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1079 Builder.CreateBr(LoopBB);
1080
1081 // partword.cmpxchg.loop:
1082 Builder.SetInsertPoint(LoopBB);
1083 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1084 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1085
1086 // Mask/Or the expected and new values into place in the loaded word.
1087 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1088 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1089 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1090 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1092 NewCI->setVolatile(CI->isVolatile());
1093 // When we're building a strong cmpxchg, we need a loop, so you
1094 // might think we could use a weak cmpxchg inside. But, using strong
1095 // allows the below comparison for ShouldContinue, and we're
1096 // expecting the underlying cmpxchg to be a machine instruction,
1097 // which is strong anyways.
1098 NewCI->setWeak(CI->isWeak());
1099
1100 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1101 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1102
1103 if (CI->isWeak())
1104 Builder.CreateBr(EndBB);
1105 else
1106 Builder.CreateCondBr(Success, EndBB, FailureBB);
1107
1108 // partword.cmpxchg.failure:
1109 Builder.SetInsertPoint(FailureBB);
1110 // Upon failure, verify that the masked-out part of the loaded value
1111 // has been modified. If it didn't, abort the cmpxchg, since the
1112 // masked-in part must've.
1113 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1114 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1115 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1116
1117 // Add the second value to the phi from above
1118 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1119
1120 // partword.cmpxchg.end:
1121 Builder.SetInsertPoint(CI);
1122
1123 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1124 Value *Res = PoisonValue::get(CI->getType());
1125 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1126 Res = Builder.CreateInsertValue(Res, Success, 1);
1127
1128 CI->replaceAllUsesWith(Res);
1129 CI->eraseFromParent();
1130 return true;
1131}
1132
1133void AtomicExpandImpl::expandAtomicOpToLLSC(
1134 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1135 AtomicOrdering MemOpOrder,
1136 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1137 ReplacementIRBuilder Builder(I, *DL);
1138 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1139 MemOpOrder, PerformOp);
1140
1141 I->replaceAllUsesWith(Loaded);
1142 I->eraseFromParent();
1143}
1144
1145void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1146 ReplacementIRBuilder Builder(AI, *DL);
1147
1148 PartwordMaskValues PMV =
1149 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1150 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1151
1152 // The value operand must be sign-extended for signed min/max so that the
1153 // target's signed comparison instructions can be used. Otherwise, just
1154 // zero-ext.
1155 Instruction::CastOps CastOp = Instruction::ZExt;
1156 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1157 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1158 CastOp = Instruction::SExt;
1159
1160 Value *ValOperand_Shifted = Builder.CreateShl(
1161 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1162 PMV.ShiftAmt, "ValOperand_Shifted");
1163 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1164 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1165 AI->getOrdering());
1166 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1167 AI->replaceAllUsesWith(FinalOldResult);
1168 AI->eraseFromParent();
1169}
1170
1171void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1172 AtomicCmpXchgInst *CI) {
1173 ReplacementIRBuilder Builder(CI, *DL);
1174
1175 PartwordMaskValues PMV = createMaskInstrs(
1176 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1177 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1178
1179 Value *CmpVal_Shifted = Builder.CreateShl(
1180 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1181 "CmpVal_Shifted");
1182 Value *NewVal_Shifted = Builder.CreateShl(
1183 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1184 "NewVal_Shifted");
1185 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1186 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1187 CI->getMergedOrdering());
1188 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1189 Value *Res = PoisonValue::get(CI->getType());
1190 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1191 Value *Success = Builder.CreateICmpEQ(
1192 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1193 Res = Builder.CreateInsertValue(Res, Success, 1);
1194
1195 CI->replaceAllUsesWith(Res);
1196 CI->eraseFromParent();
1197}
1198
1199Value *AtomicExpandImpl::insertRMWLLSCLoop(
1200 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1201 AtomicOrdering MemOpOrder,
1202 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1203 LLVMContext &Ctx = Builder.getContext();
1204 BasicBlock *BB = Builder.GetInsertBlock();
1205 Function *F = BB->getParent();
1206
1207 assert(AddrAlign >=
1208 F->getDataLayout().getTypeStoreSize(ResultTy) &&
1209 "Expected at least natural alignment at this point.");
1210
1211 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1212 //
1213 // The standard expansion we produce is:
1214 // [...]
1215 // atomicrmw.start:
1216 // %loaded = @load.linked(%addr)
1217 // %new = some_op iN %loaded, %incr
1218 // %stored = @store_conditional(%new, %addr)
1219 // %try_again = icmp i32 ne %stored, 0
1220 // br i1 %try_again, label %loop, label %atomicrmw.end
1221 // atomicrmw.end:
1222 // [...]
1223 BasicBlock *ExitBB =
1224 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1225 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1226
1227 // The split call above "helpfully" added a branch at the end of BB (to the
1228 // wrong place).
1229 std::prev(BB->end())->eraseFromParent();
1230 Builder.SetInsertPoint(BB);
1231 Builder.CreateBr(LoopBB);
1232
1233 // Start the main loop block now that we've taken care of the preliminaries.
1234 Builder.SetInsertPoint(LoopBB);
1235 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1236
1237 Value *NewVal = PerformOp(Builder, Loaded);
1238
1239 Value *StoreSuccess =
1240 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1241 Value *TryAgain = Builder.CreateICmpNE(
1242 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1243 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1244
1245 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1246 return Loaded;
1247}
1248
1249/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1250/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1251/// IR. As a migration step, we convert back to what use to be the standard
1252/// way to represent a pointer cmpxchg so that we can update backends one by
1253/// one.
1255AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1256 auto *M = CI->getModule();
1257 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1258 M->getDataLayout());
1259
1260 ReplacementIRBuilder Builder(CI, *DL);
1261
1262 Value *Addr = CI->getPointerOperand();
1263
1264 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1265 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1266
1267 auto *NewCI = Builder.CreateAtomicCmpXchg(
1268 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1269 CI->getFailureOrdering(), CI->getSyncScopeID());
1270 NewCI->setVolatile(CI->isVolatile());
1271 NewCI->setWeak(CI->isWeak());
1272 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1273
1274 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1275 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1276
1277 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1278
1279 Value *Res = PoisonValue::get(CI->getType());
1280 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1281 Res = Builder.CreateInsertValue(Res, Succ, 1);
1282
1283 CI->replaceAllUsesWith(Res);
1284 CI->eraseFromParent();
1285 return NewCI;
1286}
1287
1288bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1289 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1290 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1291 Value *Addr = CI->getPointerOperand();
1292 BasicBlock *BB = CI->getParent();
1293 Function *F = BB->getParent();
1294 LLVMContext &Ctx = F->getContext();
1295 // If shouldInsertFencesForAtomic() returns true, then the target does not
1296 // want to deal with memory orders, and emitLeading/TrailingFence should take
1297 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1298 // should preserve the ordering.
1299 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1300 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1302 : CI->getMergedOrdering();
1303
1304 // In implementations which use a barrier to achieve release semantics, we can
1305 // delay emitting this barrier until we know a store is actually going to be
1306 // attempted. The cost of this delay is that we need 2 copies of the block
1307 // emitting the load-linked, affecting code size.
1308 //
1309 // Ideally, this logic would be unconditional except for the minsize check
1310 // since in other cases the extra blocks naturally collapse down to the
1311 // minimal loop. Unfortunately, this puts too much stress on later
1312 // optimisations so we avoid emitting the extra logic in those cases too.
1313 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1314 SuccessOrder != AtomicOrdering::Monotonic &&
1315 SuccessOrder != AtomicOrdering::Acquire &&
1316 !F->hasMinSize();
1317
1318 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1319 // do it even on minsize.
1320 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1321
1322 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1323 //
1324 // The full expansion we produce is:
1325 // [...]
1326 // %aligned.addr = ...
1327 // cmpxchg.start:
1328 // %unreleasedload = @load.linked(%aligned.addr)
1329 // %unreleasedload.extract = extract value from %unreleasedload
1330 // %should_store = icmp eq %unreleasedload.extract, %desired
1331 // br i1 %should_store, label %cmpxchg.releasingstore,
1332 // label %cmpxchg.nostore
1333 // cmpxchg.releasingstore:
1334 // fence?
1335 // br label cmpxchg.trystore
1336 // cmpxchg.trystore:
1337 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1338 // [%releasedload, %cmpxchg.releasedload]
1339 // %updated.new = insert %new into %loaded.trystore
1340 // %stored = @store_conditional(%updated.new, %aligned.addr)
1341 // %success = icmp eq i32 %stored, 0
1342 // br i1 %success, label %cmpxchg.success,
1343 // label %cmpxchg.releasedload/%cmpxchg.failure
1344 // cmpxchg.releasedload:
1345 // %releasedload = @load.linked(%aligned.addr)
1346 // %releasedload.extract = extract value from %releasedload
1347 // %should_store = icmp eq %releasedload.extract, %desired
1348 // br i1 %should_store, label %cmpxchg.trystore,
1349 // label %cmpxchg.failure
1350 // cmpxchg.success:
1351 // fence?
1352 // br label %cmpxchg.end
1353 // cmpxchg.nostore:
1354 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1355 // [%releasedload,
1356 // %cmpxchg.releasedload/%cmpxchg.trystore]
1357 // @load_linked_fail_balance()?
1358 // br label %cmpxchg.failure
1359 // cmpxchg.failure:
1360 // fence?
1361 // br label %cmpxchg.end
1362 // cmpxchg.end:
1363 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1364 // [%loaded.trystore, %cmpxchg.trystore]
1365 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1366 // %loaded = extract value from %loaded.exit
1367 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1368 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1369 // [...]
1370 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1371 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1372 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1373 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1374 auto ReleasedLoadBB =
1375 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1376 auto TryStoreBB =
1377 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1378 auto ReleasingStoreBB =
1379 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1380 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1381
1382 ReplacementIRBuilder Builder(CI, *DL);
1383
1384 // The split call above "helpfully" added a branch at the end of BB (to the
1385 // wrong place), but we might want a fence too. It's easiest to just remove
1386 // the branch entirely.
1387 std::prev(BB->end())->eraseFromParent();
1388 Builder.SetInsertPoint(BB);
1389 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1390 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1391
1392 PartwordMaskValues PMV =
1393 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1394 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1395 Builder.CreateBr(StartBB);
1396
1397 // Start the main loop block now that we've taken care of the preliminaries.
1398 Builder.SetInsertPoint(StartBB);
1399 Value *UnreleasedLoad =
1400 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1401 Value *UnreleasedLoadExtract =
1402 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1403 Value *ShouldStore = Builder.CreateICmpEQ(
1404 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1405
1406 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1407 // jump straight past that fence instruction (if it exists).
1408 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1409
1410 Builder.SetInsertPoint(ReleasingStoreBB);
1411 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1412 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1413 Builder.CreateBr(TryStoreBB);
1414
1415 Builder.SetInsertPoint(TryStoreBB);
1416 PHINode *LoadedTryStore =
1417 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1418 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1419 Value *NewValueInsert =
1420 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1421 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1422 PMV.AlignedAddr, MemOpOrder);
1423 StoreSuccess = Builder.CreateICmpEQ(
1424 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1425 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1426 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1427 CI->isWeak() ? FailureBB : RetryBB);
1428
1429 Builder.SetInsertPoint(ReleasedLoadBB);
1430 Value *SecondLoad;
1431 if (HasReleasedLoadBB) {
1432 SecondLoad =
1433 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1434 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1435 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1436 CI->getCompareOperand(), "should_store");
1437
1438 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1439 // jump straight past that fence instruction (if it exists).
1440 Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1441 // Update PHI node in TryStoreBB.
1442 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1443 } else
1444 Builder.CreateUnreachable();
1445
1446 // Make sure later instructions don't get reordered with a fence if
1447 // necessary.
1448 Builder.SetInsertPoint(SuccessBB);
1449 if (ShouldInsertFencesForAtomic ||
1450 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1451 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1452 Builder.CreateBr(ExitBB);
1453
1454 Builder.SetInsertPoint(NoStoreBB);
1455 PHINode *LoadedNoStore =
1456 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1457 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1458 if (HasReleasedLoadBB)
1459 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1460
1461 // In the failing case, where we don't execute the store-conditional, the
1462 // target might want to balance out the load-linked with a dedicated
1463 // instruction (e.g., on ARM, clearing the exclusive monitor).
1464 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1465 Builder.CreateBr(FailureBB);
1466
1467 Builder.SetInsertPoint(FailureBB);
1468 PHINode *LoadedFailure =
1469 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1470 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1471 if (CI->isWeak())
1472 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1473 if (ShouldInsertFencesForAtomic)
1474 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1475 Builder.CreateBr(ExitBB);
1476
1477 // Finally, we have control-flow based knowledge of whether the cmpxchg
1478 // succeeded or not. We expose this to later passes by converting any
1479 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1480 // PHI.
1481 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1482 PHINode *LoadedExit =
1483 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1484 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1485 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1486 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1487 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1488 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1489
1490 // This is the "exit value" from the cmpxchg expansion. It may be of
1491 // a type wider than the one in the cmpxchg instruction.
1492 Value *LoadedFull = LoadedExit;
1493
1494 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1495 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1496
1497 // Look for any users of the cmpxchg that are just comparing the loaded value
1498 // against the desired one, and replace them with the CFG-derived version.
1500 for (auto *User : CI->users()) {
1501 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1502 if (!EV)
1503 continue;
1504
1505 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1506 "weird extraction from { iN, i1 }");
1507
1508 if (EV->getIndices()[0] == 0)
1509 EV->replaceAllUsesWith(Loaded);
1510 else
1512
1513 PrunedInsts.push_back(EV);
1514 }
1515
1516 // We can remove the instructions now we're no longer iterating through them.
1517 for (auto *EV : PrunedInsts)
1518 EV->eraseFromParent();
1519
1520 if (!CI->use_empty()) {
1521 // Some use of the full struct return that we don't understand has happened,
1522 // so we've got to reconstruct it properly.
1523 Value *Res;
1524 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1525 Res = Builder.CreateInsertValue(Res, Success, 1);
1526
1527 CI->replaceAllUsesWith(Res);
1528 }
1529
1530 CI->eraseFromParent();
1531 return true;
1532}
1533
1534bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1535 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1536 if (!C)
1537 return false;
1538
1540 switch (Op) {
1541 case AtomicRMWInst::Add:
1542 case AtomicRMWInst::Sub:
1543 case AtomicRMWInst::Or:
1544 case AtomicRMWInst::Xor:
1545 return C->isZero();
1546 case AtomicRMWInst::And:
1547 return C->isMinusOne();
1548 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1549 default:
1550 return false;
1551 }
1552}
1553
1554bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1555 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1556 tryExpandAtomicLoad(ResultingLoad);
1557 return true;
1558 }
1559 return false;
1560}
1561
1562Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1563 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1564 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1565 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1566 CreateCmpXchgInstFun CreateCmpXchg) {
1567 LLVMContext &Ctx = Builder.getContext();
1568 BasicBlock *BB = Builder.GetInsertBlock();
1569 Function *F = BB->getParent();
1570
1571 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1572 //
1573 // The standard expansion we produce is:
1574 // [...]
1575 // %init_loaded = load atomic iN* %addr
1576 // br label %loop
1577 // loop:
1578 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1579 // %new = some_op iN %loaded, %incr
1580 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1581 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1582 // %success = extractvalue { iN, i1 } %pair, 1
1583 // br i1 %success, label %atomicrmw.end, label %loop
1584 // atomicrmw.end:
1585 // [...]
1586 BasicBlock *ExitBB =
1587 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1588 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1589
1590 // The split call above "helpfully" added a branch at the end of BB (to the
1591 // wrong place), but we want a load. It's easiest to just remove
1592 // the branch entirely.
1593 std::prev(BB->end())->eraseFromParent();
1594 Builder.SetInsertPoint(BB);
1595 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1596 Builder.CreateBr(LoopBB);
1597
1598 // Start the main loop block now that we've taken care of the preliminaries.
1599 Builder.SetInsertPoint(LoopBB);
1600 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1601 Loaded->addIncoming(InitLoaded, BB);
1602
1603 Value *NewVal = PerformOp(Builder, Loaded);
1604
1605 Value *NewLoaded = nullptr;
1606 Value *Success = nullptr;
1607
1608 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1609 MemOpOrder == AtomicOrdering::Unordered
1611 : MemOpOrder,
1612 SSID, Success, NewLoaded);
1613 assert(Success && NewLoaded);
1614
1615 Loaded->addIncoming(NewLoaded, LoopBB);
1616
1617 Builder.CreateCondBr(Success, ExitBB, LoopBB);
1618
1619 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1620 return NewLoaded;
1621}
1622
1623bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1624 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1625 unsigned ValueSize = getAtomicOpSize(CI);
1626
1627 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1628 default:
1629 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1631 if (ValueSize < MinCASSize)
1632 return expandPartwordCmpXchg(CI);
1633 return false;
1635 return expandAtomicCmpXchg(CI);
1636 }
1638 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1639 return true;
1641 return lowerAtomicCmpXchgInst(CI);
1642 }
1643}
1644
1645// Note: This function is exposed externally by AtomicExpandUtils.h
1647 CreateCmpXchgInstFun CreateCmpXchg) {
1648 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1649 Builder.setIsFPConstrained(
1650 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1651
1652 // FIXME: If FP exceptions are observable, we should force them off for the
1653 // loop for the FP atomics.
1654 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1655 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1656 AI->getOrdering(), AI->getSyncScopeID(),
1657 [&](IRBuilderBase &Builder, Value *Loaded) {
1658 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1659 AI->getValOperand());
1660 },
1661 CreateCmpXchg);
1662
1663 AI->replaceAllUsesWith(Loaded);
1664 AI->eraseFromParent();
1665 return true;
1666}
1667
1668// In order to use one of the sized library calls such as
1669// __atomic_fetch_add_4, the alignment must be sufficient, the size
1670// must be one of the potentially-specialized sizes, and the value
1671// type must actually exist in C on the target (otherwise, the
1672// function wouldn't actually be defined.)
1673static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1674 const DataLayout &DL) {
1675 // TODO: "LargestSize" is an approximation for "largest type that
1676 // you can express in C". It seems to be the case that int128 is
1677 // supported on all 64-bit platforms, otherwise only up to 64-bit
1678 // integers are supported. If we get this wrong, then we'll try to
1679 // call a sized libcall that doesn't actually exist. There should
1680 // really be some more reliable way in LLVM of determining integer
1681 // sizes which are valid in the target's C ABI...
1682 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1683 return Alignment >= Size &&
1684 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1685 Size <= LargestSize;
1686}
1687
1688void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1689 static const RTLIB::Libcall Libcalls[6] = {
1690 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1691 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1692 unsigned Size = getAtomicOpSize(I);
1693
1694 bool expanded = expandAtomicOpToLibcall(
1695 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1696 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1697 if (!expanded)
1698 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
1699}
1700
1701void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1702 static const RTLIB::Libcall Libcalls[6] = {
1703 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1704 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1705 unsigned Size = getAtomicOpSize(I);
1706
1707 bool expanded = expandAtomicOpToLibcall(
1708 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1709 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1710 if (!expanded)
1711 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
1712}
1713
1714void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1715 static const RTLIB::Libcall Libcalls[6] = {
1716 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1717 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1718 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1719 unsigned Size = getAtomicOpSize(I);
1720
1721 bool expanded = expandAtomicOpToLibcall(
1722 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1723 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1724 Libcalls);
1725 if (!expanded)
1726 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
1727}
1728
1730 static const RTLIB::Libcall LibcallsXchg[6] = {
1731 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1732 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1733 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1734 static const RTLIB::Libcall LibcallsAdd[6] = {
1735 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1736 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1737 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1738 static const RTLIB::Libcall LibcallsSub[6] = {
1739 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1740 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1741 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1742 static const RTLIB::Libcall LibcallsAnd[6] = {
1743 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1744 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1745 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1746 static const RTLIB::Libcall LibcallsOr[6] = {
1747 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1748 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1749 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1750 static const RTLIB::Libcall LibcallsXor[6] = {
1751 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1752 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1753 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1754 static const RTLIB::Libcall LibcallsNand[6] = {
1755 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1756 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1757 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1758
1759 switch (Op) {
1761 llvm_unreachable("Should not have BAD_BINOP.");
1763 return ArrayRef(LibcallsXchg);
1764 case AtomicRMWInst::Add:
1765 return ArrayRef(LibcallsAdd);
1766 case AtomicRMWInst::Sub:
1767 return ArrayRef(LibcallsSub);
1768 case AtomicRMWInst::And:
1769 return ArrayRef(LibcallsAnd);
1770 case AtomicRMWInst::Or:
1771 return ArrayRef(LibcallsOr);
1772 case AtomicRMWInst::Xor:
1773 return ArrayRef(LibcallsXor);
1775 return ArrayRef(LibcallsNand);
1776 case AtomicRMWInst::Max:
1777 case AtomicRMWInst::Min:
1786 // No atomic libcalls are available for max/min/umax/umin.
1787 return {};
1788 }
1789 llvm_unreachable("Unexpected AtomicRMW operation.");
1790}
1791
1792void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1793 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1794
1795 unsigned Size = getAtomicOpSize(I);
1796
1797 bool Success = false;
1798 if (!Libcalls.empty())
1799 Success = expandAtomicOpToLibcall(
1800 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1801 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1802
1803 // The expansion failed: either there were no libcalls at all for
1804 // the operation (min/max), or there were only size-specialized
1805 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1806 // CAS libcall, via a CAS loop, instead.
1807 if (!Success) {
1809 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1810 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1811 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
1812 // Create the CAS instruction normally...
1813 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1814 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1816 Success = Builder.CreateExtractValue(Pair, 1, "success");
1817 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1818
1819 // ...and then expand the CAS into a libcall.
1820 expandAtomicCASToLibcall(Pair);
1821 });
1822 }
1823}
1824
1825// A helper routine for the above expandAtomic*ToLibcall functions.
1826//
1827// 'Libcalls' contains an array of enum values for the particular
1828// ATOMIC libcalls to be emitted. All of the other arguments besides
1829// 'I' are extracted from the Instruction subclass by the
1830// caller. Depending on the particular call, some will be null.
1831bool AtomicExpandImpl::expandAtomicOpToLibcall(
1832 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1833 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1834 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1835 assert(Libcalls.size() == 6);
1836
1837 LLVMContext &Ctx = I->getContext();
1838 Module *M = I->getModule();
1839 const DataLayout &DL = M->getDataLayout();
1840 IRBuilder<> Builder(I);
1841 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1842
1843 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1844 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1845
1846 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1847
1848 // TODO: the "order" argument type is "int", not int32. So
1849 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1850 ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1851 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1852 Constant *OrderingVal =
1853 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1854 Constant *Ordering2Val = nullptr;
1855 if (CASExpected) {
1856 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1857 Ordering2Val =
1858 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1859 }
1860 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1861
1862 RTLIB::Libcall RTLibType;
1863 if (UseSizedLibcall) {
1864 switch (Size) {
1865 case 1:
1866 RTLibType = Libcalls[1];
1867 break;
1868 case 2:
1869 RTLibType = Libcalls[2];
1870 break;
1871 case 4:
1872 RTLibType = Libcalls[3];
1873 break;
1874 case 8:
1875 RTLibType = Libcalls[4];
1876 break;
1877 case 16:
1878 RTLibType = Libcalls[5];
1879 break;
1880 }
1881 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1882 RTLibType = Libcalls[0];
1883 } else {
1884 // Can't use sized function, and there's no generic for this
1885 // operation, so give up.
1886 return false;
1887 }
1888
1889 if (!TLI->getLibcallName(RTLibType)) {
1890 // This target does not implement the requested atomic libcall so give up.
1891 return false;
1892 }
1893
1894 // Build up the function call. There's two kinds. First, the sized
1895 // variants. These calls are going to be one of the following (with
1896 // N=1,2,4,8,16):
1897 // iN __atomic_load_N(iN *ptr, int ordering)
1898 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1899 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1900 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1901 // int success_order, int failure_order)
1902 //
1903 // Note that these functions can be used for non-integer atomic
1904 // operations, the values just need to be bitcast to integers on the
1905 // way in and out.
1906 //
1907 // And, then, the generic variants. They look like the following:
1908 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1909 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1910 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1911 // int ordering)
1912 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1913 // void *desired, int success_order,
1914 // int failure_order)
1915 //
1916 // The different signatures are built up depending on the
1917 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1918 // variables.
1919
1920 AllocaInst *AllocaCASExpected = nullptr;
1921 AllocaInst *AllocaValue = nullptr;
1922 AllocaInst *AllocaResult = nullptr;
1923
1924 Type *ResultTy;
1926 AttributeList Attr;
1927
1928 // 'size' argument.
1929 if (!UseSizedLibcall) {
1930 // Note, getIntPtrType is assumed equivalent to size_t.
1931 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1932 }
1933
1934 // 'ptr' argument.
1935 // note: This assumes all address spaces share a common libfunc
1936 // implementation and that addresses are convertable. For systems without
1937 // that property, we'd need to extend this mechanism to support AS-specific
1938 // families of atomic intrinsics.
1939 Value *PtrVal = PointerOperand;
1940 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
1941 Args.push_back(PtrVal);
1942
1943 // 'expected' argument, if present.
1944 if (CASExpected) {
1945 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1946 AllocaCASExpected->setAlignment(AllocaAlignment);
1947 Builder.CreateLifetimeStart(AllocaCASExpected, SizeVal64);
1948 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1949 Args.push_back(AllocaCASExpected);
1950 }
1951
1952 // 'val' argument ('desired' for cas), if present.
1953 if (ValueOperand) {
1954 if (UseSizedLibcall) {
1955 Value *IntValue =
1956 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1957 Args.push_back(IntValue);
1958 } else {
1959 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1960 AllocaValue->setAlignment(AllocaAlignment);
1961 Builder.CreateLifetimeStart(AllocaValue, SizeVal64);
1962 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1963 Args.push_back(AllocaValue);
1964 }
1965 }
1966
1967 // 'ret' argument.
1968 if (!CASExpected && HasResult && !UseSizedLibcall) {
1969 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1970 AllocaResult->setAlignment(AllocaAlignment);
1971 Builder.CreateLifetimeStart(AllocaResult, SizeVal64);
1972 Args.push_back(AllocaResult);
1973 }
1974
1975 // 'ordering' ('success_order' for cas) argument.
1976 Args.push_back(OrderingVal);
1977
1978 // 'failure_order' argument, if present.
1979 if (Ordering2Val)
1980 Args.push_back(Ordering2Val);
1981
1982 // Now, the return type.
1983 if (CASExpected) {
1984 ResultTy = Type::getInt1Ty(Ctx);
1985 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
1986 } else if (HasResult && UseSizedLibcall)
1987 ResultTy = SizedIntTy;
1988 else
1989 ResultTy = Type::getVoidTy(Ctx);
1990
1991 // Done with setting up arguments and return types, create the call:
1993 for (Value *Arg : Args)
1994 ArgTys.push_back(Arg->getType());
1995 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1996 FunctionCallee LibcallFn =
1997 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1998 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1999 Call->setAttributes(Attr);
2000 Value *Result = Call;
2001
2002 // And then, extract the results...
2003 if (ValueOperand && !UseSizedLibcall)
2004 Builder.CreateLifetimeEnd(AllocaValue, SizeVal64);
2005
2006 if (CASExpected) {
2007 // The final result from the CAS is {load of 'expected' alloca, bool result
2008 // from call}
2009 Type *FinalResultTy = I->getType();
2010 Value *V = PoisonValue::get(FinalResultTy);
2011 Value *ExpectedOut = Builder.CreateAlignedLoad(
2012 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2013 Builder.CreateLifetimeEnd(AllocaCASExpected, SizeVal64);
2014 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2015 V = Builder.CreateInsertValue(V, Result, 1);
2016 I->replaceAllUsesWith(V);
2017 } else if (HasResult) {
2018 Value *V;
2019 if (UseSizedLibcall)
2020 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2021 else {
2022 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2023 AllocaAlignment);
2024 Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
2025 }
2026 I->replaceAllUsesWith(V);
2027 }
2028 I->eraseFromParent();
2029 return true;
2030}
#define Success
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
#define DEBUG_TYPE
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:203
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
#define DEBUG_TYPE
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
Module.h This file contains the declarations for the Module class.
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
Definition: Instructions.h:61
void setAlignment(Align Align)
Definition: Instructions.h:126
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
Definition: Instructions.h:599
void setWeak(bool IsWeak)
Definition: Instructions.h:556
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:547
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:586
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:644
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:536
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:554
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:551
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:574
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:612
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:809
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
Definition: Instructions.h:819
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:708
@ Add
*p = old + v
Definition: Instructions.h:712
@ FAdd
*p = old + v
Definition: Instructions.h:733
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:726
@ Or
*p = old | v
Definition: Instructions.h:720
@ Sub
*p = old - v
Definition: Instructions.h:714
@ And
*p = old & v
Definition: Instructions.h:716
@ Xor
*p = old ^ v
Definition: Instructions.h:722
@ FSub
*p = old - v
Definition: Instructions.h:736
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:724
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:730
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:744
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:728
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:740
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
@ Nand
*p = ~(old & v)
Definition: Instructions.h:718
Value * getPointerOperand()
Definition: Instructions.h:852
BinOp getOperation() const
Definition: Instructions.h:787
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:843
Value * getValOperand()
Definition: Instructions.h:856
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:829
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
Definition: Attributes.h:584
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:577
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1846
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2531
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:536
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1812
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1268
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:482
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2524
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:933
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2130
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1442
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2253
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:230
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2213
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2405
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1754
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2249
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition: IRBuilder.h:314
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2135
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1125
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1795
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1421
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2029
LLVMContext & getContext() const
Definition: IRBuilder.h:173
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1480
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2125
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1859
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2015
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:497
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1502
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1119
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2169
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1831
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2420
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1524
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2140
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition: IRBuilder.h:74
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2674
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1642
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:266
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
unsigned getMDKindID(StringRef Name) const
getMDKindID - Return a unique non-zero ID for the specified metadata kind.
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Definition: Instructions.h:174
Value * getPointerOperand()
Definition: Instructions.h:253
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:203
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:239
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:218
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:206
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:228
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:209
Metadata node.
Definition: Metadata.h:1069
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:296
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
bool empty() const
Definition: SmallVector.h:95
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
An instruction for storing to memory.
Definition: Instructions.h:290
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:324
void setAlignment(Align Align)
Definition: Instructions.h:333
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:360
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:261
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:251
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeAtomicExpandLegacyPass(PassRegistry &)
bool canInstructionHaveMMRAs(const Instruction &I)
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
Definition: LowerAtomic.cpp:41
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:293
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
Definition: LowerAtomic.cpp:22
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:397