LLVM 20.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
29#include "llvm/IR/Attributes.h"
30#include "llvm/IR/BasicBlock.h"
31#include "llvm/IR/Constant.h"
32#include "llvm/IR/Constants.h"
33#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/Function.h"
36#include "llvm/IR/IRBuilder.h"
38#include "llvm/IR/Instruction.h"
40#include "llvm/IR/MDBuilder.h"
42#include "llvm/IR/Module.h"
43#include "llvm/IR/Type.h"
44#include "llvm/IR/User.h"
45#include "llvm/IR/Value.h"
47#include "llvm/Pass.h"
50#include "llvm/Support/Debug.h"
55#include <cassert>
56#include <cstdint>
57#include <iterator>
58
59using namespace llvm;
60
61#define DEBUG_TYPE "atomic-expand"
62
63namespace {
64
65class AtomicExpandImpl {
66 const TargetLowering *TLI = nullptr;
67 const DataLayout *DL = nullptr;
68
69private:
70 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
71 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
72 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
73 bool tryExpandAtomicLoad(LoadInst *LI);
74 bool expandAtomicLoadToLL(LoadInst *LI);
75 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
76 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
77 bool tryExpandAtomicStore(StoreInst *SI);
78 void expandAtomicStore(StoreInst *SI);
79 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
80 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
81 Value *
82 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
83 Align AddrAlign, AtomicOrdering MemOpOrder,
84 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
85 void expandAtomicOpToLLSC(
86 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
87 AtomicOrdering MemOpOrder,
88 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
89 void expandPartwordAtomicRMW(
91 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
92 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
93 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
94 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
95
96 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
97 static Value *insertRMWCmpXchgLoop(
98 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
99 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
100 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
101 CreateCmpXchgInstFun CreateCmpXchg);
102 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
103
104 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
105 bool isIdempotentRMW(AtomicRMWInst *RMWI);
106 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
107
108 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
109 Value *PointerOperand, Value *ValueOperand,
110 Value *CASExpected, AtomicOrdering Ordering,
111 AtomicOrdering Ordering2,
112 ArrayRef<RTLIB::Libcall> Libcalls);
113 void expandAtomicLoadToLibcall(LoadInst *LI);
114 void expandAtomicStoreToLibcall(StoreInst *LI);
115 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
116 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
117
118 friend bool
120 CreateCmpXchgInstFun CreateCmpXchg);
121
122 bool processAtomicInstr(Instruction *I);
123
124public:
125 bool run(Function &F, const TargetMachine *TM);
126};
127
128class AtomicExpandLegacy : public FunctionPass {
129public:
130 static char ID; // Pass identification, replacement for typeid
131
132 AtomicExpandLegacy() : FunctionPass(ID) {
134 }
135
136 bool runOnFunction(Function &F) override;
137};
138
139// IRBuilder to be used for replacement atomic instructions.
140struct ReplacementIRBuilder
141 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
142 MDNode *MMRAMD = nullptr;
143
144 // Preserves the DebugLoc from I, and preserves still valid metadata.
145 // Enable StrictFP builder mode when appropriate.
146 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
147 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
149 [this](Instruction *I) { addMMRAMD(I); })) {
151 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
152 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
153 this->setIsFPConstrained(true);
154
155 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
156 }
157
158 void addMMRAMD(Instruction *I) {
160 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
161 }
162};
163
164} // end anonymous namespace
165
166char AtomicExpandLegacy::ID = 0;
167
168char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
169
171 "Expand Atomic instructions", false, false)
173INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
174 "Expand Atomic instructions", false, false)
175
176// Helper functions to retrieve the size of atomic instructions.
177static unsigned getAtomicOpSize(LoadInst *LI) {
178 const DataLayout &DL = LI->getDataLayout();
179 return DL.getTypeStoreSize(LI->getType());
180}
181
182static unsigned getAtomicOpSize(StoreInst *SI) {
183 const DataLayout &DL = SI->getDataLayout();
184 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
185}
186
187static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
188 const DataLayout &DL = RMWI->getDataLayout();
189 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
190}
191
192static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
193 const DataLayout &DL = CASI->getDataLayout();
194 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
195}
196
197// Determine if a particular atomic operation has a supported size,
198// and is of appropriate alignment, to be passed through for target
199// lowering. (Versus turning into a __atomic libcall)
200template <typename Inst>
201static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
202 unsigned Size = getAtomicOpSize(I);
203 Align Alignment = I->getAlign();
204 return Alignment >= Size &&
206}
207
208bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
209 auto *LI = dyn_cast<LoadInst>(I);
210 auto *SI = dyn_cast<StoreInst>(I);
211 auto *RMWI = dyn_cast<AtomicRMWInst>(I);
212 auto *CASI = dyn_cast<AtomicCmpXchgInst>(I);
213
214 bool MadeChange = false;
215
216 // If the Size/Alignment is not supported, replace with a libcall.
217 if (LI) {
218 if (!LI->isAtomic())
219 return false;
220
221 if (!atomicSizeSupported(TLI, LI)) {
222 expandAtomicLoadToLibcall(LI);
223 return true;
224 }
225
226 if (TLI->shouldCastAtomicLoadInIR(LI) ==
227 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
228 I = LI = convertAtomicLoadToIntegerType(LI);
229 MadeChange = true;
230 }
231 } else if (SI) {
232 if (!SI->isAtomic())
233 return false;
234
235 if (!atomicSizeSupported(TLI, SI)) {
236 expandAtomicStoreToLibcall(SI);
237 return true;
238 }
239
240 if (TLI->shouldCastAtomicStoreInIR(SI) ==
241 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
242 I = SI = convertAtomicStoreToIntegerType(SI);
243 MadeChange = true;
244 }
245 } else if (RMWI) {
246 if (!atomicSizeSupported(TLI, RMWI)) {
247 expandAtomicRMWToLibcall(RMWI);
248 return true;
249 }
250
251 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
252 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
253 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
254 MadeChange = true;
255 }
256 } else if (CASI) {
257 if (!atomicSizeSupported(TLI, CASI)) {
258 expandAtomicCASToLibcall(CASI);
259 return true;
260 }
261
262 // TODO: when we're ready to make the change at the IR level, we can
263 // extend convertCmpXchgToInteger for floating point too.
264 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
265 // TODO: add a TLI hook to control this so that each target can
266 // convert to lowering the original type one at a time.
267 I = CASI = convertCmpXchgToIntegerType(CASI);
268 MadeChange = true;
269 }
270 } else
271 return false;
272
273 if (TLI->shouldInsertFencesForAtomic(I)) {
274 auto FenceOrdering = AtomicOrdering::Monotonic;
275 if (LI && isAcquireOrStronger(LI->getOrdering())) {
276 FenceOrdering = LI->getOrdering();
277 LI->setOrdering(AtomicOrdering::Monotonic);
278 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
279 FenceOrdering = SI->getOrdering();
280 SI->setOrdering(AtomicOrdering::Monotonic);
281 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
282 isAcquireOrStronger(RMWI->getOrdering()))) {
283 FenceOrdering = RMWI->getOrdering();
284 RMWI->setOrdering(AtomicOrdering::Monotonic);
285 } else if (CASI &&
286 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
287 TargetLoweringBase::AtomicExpansionKind::None &&
288 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
289 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
290 isAcquireOrStronger(CASI->getFailureOrdering()))) {
291 // If a compare and swap is lowered to LL/SC, we can do smarter fence
292 // insertion, with a stronger one on the success path than on the
293 // failure path. As a result, fence insertion is directly done by
294 // expandAtomicCmpXchg in that case.
295 FenceOrdering = CASI->getMergedOrdering();
296 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
297 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
298 }
299
300 if (FenceOrdering != AtomicOrdering::Monotonic) {
301 MadeChange |= bracketInstWithFences(I, FenceOrdering);
302 }
303 } else if (I->hasAtomicStore() &&
304 TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
305 auto FenceOrdering = AtomicOrdering::Monotonic;
306 if (SI)
307 FenceOrdering = SI->getOrdering();
308 else if (RMWI)
309 FenceOrdering = RMWI->getOrdering();
310 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
311 TargetLoweringBase::AtomicExpansionKind::LLSC)
312 // LLSC is handled in expandAtomicCmpXchg().
313 FenceOrdering = CASI->getSuccessOrdering();
314
315 IRBuilder Builder(I);
316 if (auto TrailingFence =
317 TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
318 TrailingFence->moveAfter(I);
319 MadeChange = true;
320 }
321 }
322
323 if (LI)
324 MadeChange |= tryExpandAtomicLoad(LI);
325 else if (SI)
326 MadeChange |= tryExpandAtomicStore(SI);
327 else if (RMWI) {
328 // There are two different ways of expanding RMW instructions:
329 // - into a load if it is idempotent
330 // - into a Cmpxchg/LL-SC loop otherwise
331 // we try them in that order.
332
333 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
334 MadeChange = true;
335
336 } else {
337 MadeChange |= tryExpandAtomicRMW(RMWI);
338 }
339 } else if (CASI)
340 MadeChange |= tryExpandAtomicCmpXchg(CASI);
341
342 return MadeChange;
343}
344
345bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
346 const auto *Subtarget = TM->getSubtargetImpl(F);
347 if (!Subtarget->enableAtomicExpand())
348 return false;
349 TLI = Subtarget->getTargetLowering();
350 DL = &F.getDataLayout();
351
352 bool MadeChange = false;
353
355
356 // Changing control-flow while iterating through it is a bad idea, so gather a
357 // list of all atomic instructions before we start.
358 for (Instruction &I : instructions(F))
359 if (I.isAtomic() && !isa<FenceInst>(&I))
360 AtomicInsts.push_back(&I);
361
362 for (auto *I : AtomicInsts) {
363 if (processAtomicInstr(I))
364 MadeChange = true;
365 }
366
367 return MadeChange;
368}
369
370bool AtomicExpandLegacy::runOnFunction(Function &F) {
371
372 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
373 if (!TPC)
374 return false;
375 auto *TM = &TPC->getTM<TargetMachine>();
376 AtomicExpandImpl AE;
377 return AE.run(F, TM);
378}
379
381 return new AtomicExpandLegacy();
382}
383
386 AtomicExpandImpl AE;
387
388 bool Changed = AE.run(F, TM);
389 if (!Changed)
390 return PreservedAnalyses::all();
391
393}
394
395bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
396 AtomicOrdering Order) {
397 ReplacementIRBuilder Builder(I, *DL);
398
399 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
400
401 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
402 // We have a guard here because not every atomic operation generates a
403 // trailing fence.
404 if (TrailingFence)
405 TrailingFence->moveAfter(I);
406
407 return (LeadingFence || TrailingFence);
408}
409
410/// Get the iX type with the same bitwidth as T.
412AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
413 EVT VT = TLI->getMemValueType(DL, T);
414 unsigned BitWidth = VT.getStoreSizeInBits();
415 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
416 return IntegerType::get(T->getContext(), BitWidth);
417}
418
419/// Convert an atomic load of a non-integral type to an integer load of the
420/// equivalent bitwidth. See the function comment on
421/// convertAtomicStoreToIntegerType for background.
422LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
423 auto *M = LI->getModule();
424 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
425
426 ReplacementIRBuilder Builder(LI, *DL);
427
429
430 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
431 NewLI->setAlignment(LI->getAlign());
432 NewLI->setVolatile(LI->isVolatile());
433 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
434 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
435
436 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
437 LI->replaceAllUsesWith(NewVal);
438 LI->eraseFromParent();
439 return NewLI;
440}
441
443AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
445
446 auto *M = RMWI->getModule();
447 Type *NewTy =
448 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
449
450 ReplacementIRBuilder Builder(RMWI, *DL);
451
452 Value *Addr = RMWI->getPointerOperand();
453 Value *Val = RMWI->getValOperand();
454 Value *NewVal = Val->getType()->isPointerTy()
455 ? Builder.CreatePtrToInt(Val, NewTy)
456 : Builder.CreateBitCast(Val, NewTy);
457
458 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
459 RMWI->getAlign(), RMWI->getOrdering(),
460 RMWI->getSyncScopeID());
461 NewRMWI->setVolatile(RMWI->isVolatile());
462 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
463
464 Value *NewRVal = RMWI->getType()->isPointerTy()
465 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
466 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
467 RMWI->replaceAllUsesWith(NewRVal);
468 RMWI->eraseFromParent();
469 return NewRMWI;
470}
471
472bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
473 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
475 return false;
477 expandAtomicOpToLLSC(
478 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
479 LI->getOrdering(),
480 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
481 return true;
483 return expandAtomicLoadToLL(LI);
485 return expandAtomicLoadToCmpXchg(LI);
488 return true;
489 default:
490 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
491 }
492}
493
494bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
495 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
497 return false;
499 expandAtomicStore(SI);
500 return true;
502 SI->setAtomic(AtomicOrdering::NotAtomic);
503 return true;
504 default:
505 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
506 }
507}
508
509bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
510 ReplacementIRBuilder Builder(LI, *DL);
511
512 // On some architectures, load-linked instructions are atomic for larger
513 // sizes than normal loads. For example, the only 64-bit load guaranteed
514 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
515 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
516 LI->getPointerOperand(), LI->getOrdering());
517 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
518
519 LI->replaceAllUsesWith(Val);
520 LI->eraseFromParent();
521
522 return true;
523}
524
525bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
526 ReplacementIRBuilder Builder(LI, *DL);
527 AtomicOrdering Order = LI->getOrdering();
528 if (Order == AtomicOrdering::Unordered)
530
532 Type *Ty = LI->getType();
533 Constant *DummyVal = Constant::getNullValue(Ty);
534
535 Value *Pair = Builder.CreateAtomicCmpXchg(
536 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
538 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
539
540 LI->replaceAllUsesWith(Loaded);
541 LI->eraseFromParent();
542
543 return true;
544}
545
546/// Convert an atomic store of a non-integral type to an integer store of the
547/// equivalent bitwidth. We used to not support floating point or vector
548/// atomics in the IR at all. The backends learned to deal with the bitcast
549/// idiom because that was the only way of expressing the notion of a atomic
550/// float or vector store. The long term plan is to teach each backend to
551/// instruction select from the original atomic store, but as a migration
552/// mechanism, we convert back to the old format which the backends understand.
553/// Each backend will need individual work to recognize the new format.
554StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
555 ReplacementIRBuilder Builder(SI, *DL);
556 auto *M = SI->getModule();
557 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
558 M->getDataLayout());
559 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
560
561 Value *Addr = SI->getPointerOperand();
562
563 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
564 NewSI->setAlignment(SI->getAlign());
565 NewSI->setVolatile(SI->isVolatile());
566 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
567 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
568 SI->eraseFromParent();
569 return NewSI;
570}
571
572void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) {
573 // This function is only called on atomic stores that are too large to be
574 // atomic if implemented as a native store. So we replace them by an
575 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
576 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
577 // It is the responsibility of the target to only signal expansion via
578 // shouldExpandAtomicRMW in cases where this is required and possible.
579 ReplacementIRBuilder Builder(SI, *DL);
580 AtomicOrdering Ordering = SI->getOrdering();
584 : Ordering;
585 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
586 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
587 SI->getAlign(), RMWOrdering);
588 SI->eraseFromParent();
589
590 // Now we have an appropriate swap instruction, lower it as usual.
591 tryExpandAtomicRMW(AI);
592}
593
595 Value *Loaded, Value *NewVal, Align AddrAlign,
596 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
597 Value *&Success, Value *&NewLoaded) {
598 Type *OrigTy = NewVal->getType();
599
600 // This code can go away when cmpxchg supports FP and vector types.
601 assert(!OrigTy->isPointerTy());
602 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
603 if (NeedBitcast) {
604 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
605 NewVal = Builder.CreateBitCast(NewVal, IntTy);
606 Loaded = Builder.CreateBitCast(Loaded, IntTy);
607 }
608
609 Value *Pair = Builder.CreateAtomicCmpXchg(
610 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
612 Success = Builder.CreateExtractValue(Pair, 1, "success");
613 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
614
615 if (NeedBitcast)
616 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
617}
618
619bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
620 LLVMContext &Ctx = AI->getModule()->getContext();
621 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
622 switch (Kind) {
624 return false;
626 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
627 unsigned ValueSize = getAtomicOpSize(AI);
628 if (ValueSize < MinCASSize) {
629 expandPartwordAtomicRMW(AI,
631 } else {
632 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
633 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
634 AI->getValOperand());
635 };
636 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
637 AI->getAlign(), AI->getOrdering(), PerformOp);
638 }
639 return true;
640 }
642 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
643 unsigned ValueSize = getAtomicOpSize(AI);
644 if (ValueSize < MinCASSize) {
645 expandPartwordAtomicRMW(AI,
647 } else {
649 Ctx.getSyncScopeNames(SSNs);
650 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
651 ? "system"
652 : SSNs[AI->getSyncScopeID()];
654 ORE.emit([&]() {
655 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
656 << "A compare and swap loop was generated for an atomic "
657 << AI->getOperationName(AI->getOperation()) << " operation at "
658 << MemScope << " memory scope";
659 });
661 }
662 return true;
663 }
665 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
666 unsigned ValueSize = getAtomicOpSize(AI);
667 if (ValueSize < MinCASSize) {
669 // Widen And/Or/Xor and give the target another chance at expanding it.
672 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
673 return true;
674 }
675 }
676 expandAtomicRMWToMaskedIntrinsic(AI);
677 return true;
678 }
680 TLI->emitBitTestAtomicRMWIntrinsic(AI);
681 return true;
682 }
684 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
685 return true;
686 }
688 return lowerAtomicRMWInst(AI);
690 TLI->emitExpandAtomicRMW(AI);
691 return true;
692 default:
693 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
694 }
695}
696
697namespace {
698
699struct PartwordMaskValues {
700 // These three fields are guaranteed to be set by createMaskInstrs.
701 Type *WordType = nullptr;
702 Type *ValueType = nullptr;
703 Type *IntValueType = nullptr;
704 Value *AlignedAddr = nullptr;
705 Align AlignedAddrAlignment;
706 // The remaining fields can be null.
707 Value *ShiftAmt = nullptr;
708 Value *Mask = nullptr;
709 Value *Inv_Mask = nullptr;
710};
711
713raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
714 auto PrintObj = [&O](auto *V) {
715 if (V)
716 O << *V;
717 else
718 O << "nullptr";
719 O << '\n';
720 };
721 O << "PartwordMaskValues {\n";
722 O << " WordType: ";
723 PrintObj(PMV.WordType);
724 O << " ValueType: ";
725 PrintObj(PMV.ValueType);
726 O << " AlignedAddr: ";
727 PrintObj(PMV.AlignedAddr);
728 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
729 O << " ShiftAmt: ";
730 PrintObj(PMV.ShiftAmt);
731 O << " Mask: ";
732 PrintObj(PMV.Mask);
733 O << " Inv_Mask: ";
734 PrintObj(PMV.Inv_Mask);
735 O << "}\n";
736 return O;
737}
738
739} // end anonymous namespace
740
741/// This is a helper function which builds instructions to provide
742/// values necessary for partword atomic operations. It takes an
743/// incoming address, Addr, and ValueType, and constructs the address,
744/// shift-amounts and masks needed to work with a larger value of size
745/// WordSize.
746///
747/// AlignedAddr: Addr rounded down to a multiple of WordSize
748///
749/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
750/// from AlignAddr for it to have the same value as if
751/// ValueType was loaded from Addr.
752///
753/// Mask: Value to mask with the value loaded from AlignAddr to
754/// include only the part that would've been loaded from Addr.
755///
756/// Inv_Mask: The inverse of Mask.
757static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
759 Value *Addr, Align AddrAlign,
760 unsigned MinWordSize) {
761 PartwordMaskValues PMV;
762
763 Module *M = I->getModule();
764 LLVMContext &Ctx = M->getContext();
765 const DataLayout &DL = M->getDataLayout();
766 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
767
768 PMV.ValueType = PMV.IntValueType = ValueType;
769 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
770 PMV.IntValueType =
771 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
772
773 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
774 : ValueType;
775 if (PMV.ValueType == PMV.WordType) {
776 PMV.AlignedAddr = Addr;
777 PMV.AlignedAddrAlignment = AddrAlign;
778 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
779 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
780 return PMV;
781 }
782
783 PMV.AlignedAddrAlignment = Align(MinWordSize);
784
785 assert(ValueSize < MinWordSize);
786
787 PointerType *PtrTy = cast<PointerType>(Addr->getType());
788 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
789 Value *PtrLSB;
790
791 if (AddrAlign < MinWordSize) {
792 PMV.AlignedAddr = Builder.CreateIntrinsic(
793 Intrinsic::ptrmask, {PtrTy, IntTy},
794 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
795 "AlignedAddr");
796
797 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
798 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
799 } else {
800 // If the alignment is high enough, the LSB are known 0.
801 PMV.AlignedAddr = Addr;
802 PtrLSB = ConstantInt::getNullValue(IntTy);
803 }
804
805 if (DL.isLittleEndian()) {
806 // turn bytes into bits
807 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
808 } else {
809 // turn bytes into bits, and count from the other side.
810 PMV.ShiftAmt = Builder.CreateShl(
811 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
812 }
813
814 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
815 PMV.Mask = Builder.CreateShl(
816 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
817 "Mask");
818
819 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
820
821 return PMV;
822}
823
824static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
825 const PartwordMaskValues &PMV) {
826 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
827 if (PMV.WordType == PMV.ValueType)
828 return WideWord;
829
830 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
831 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
832 return Builder.CreateBitCast(Trunc, PMV.ValueType);
833}
834
835static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
836 Value *Updated, const PartwordMaskValues &PMV) {
837 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
838 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
839 if (PMV.WordType == PMV.ValueType)
840 return Updated;
841
842 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
843
844 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
845 Value *Shift =
846 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
847 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
848 Value *Or = Builder.CreateOr(And, Shift, "inserted");
849 return Or;
850}
851
852/// Emit IR to implement a masked version of a given atomicrmw
853/// operation. (That is, only the bits under the Mask should be
854/// affected by the operation)
856 IRBuilderBase &Builder, Value *Loaded,
857 Value *Shifted_Inc, Value *Inc,
858 const PartwordMaskValues &PMV) {
859 // TODO: update to use
860 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
861 // to merge bits from two values without requiring PMV.Inv_Mask.
862 switch (Op) {
863 case AtomicRMWInst::Xchg: {
864 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
865 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
866 return FinalVal;
867 }
871 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
874 case AtomicRMWInst::Nand: {
875 // The other arithmetic ops need to be masked into place.
876 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
877 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
878 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
879 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
880 return FinalVal;
881 }
892 // Finally, other ops will operate on the full value, so truncate down to
893 // the original size, and expand out again after doing the
894 // operation. Bitcasts will be inserted for FP values.
895 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
896 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
897 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
898 return FinalVal;
899 }
900 default:
901 llvm_unreachable("Unknown atomic op");
902 }
903}
904
905/// Expand a sub-word atomicrmw operation into an appropriate
906/// word-sized operation.
907///
908/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
909/// way as a typical atomicrmw expansion. The only difference here is
910/// that the operation inside of the loop may operate upon only a
911/// part of the value.
912void AtomicExpandImpl::expandPartwordAtomicRMW(
914 // Widen And/Or/Xor and give the target another chance at expanding it.
918 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
919 return;
920 }
921 AtomicOrdering MemOpOrder = AI->getOrdering();
922 SyncScope::ID SSID = AI->getSyncScopeID();
923
924 ReplacementIRBuilder Builder(AI, *DL);
925
926 PartwordMaskValues PMV =
927 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
928 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
929
930 Value *ValOperand_Shifted = nullptr;
933 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
934 ValOperand_Shifted =
935 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
936 "ValOperand_Shifted");
937 }
938
939 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
940 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
941 AI->getValOperand(), PMV);
942 };
943
944 Value *OldResult;
946 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
947 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
948 PerformPartwordOp, createCmpXchgInstFun);
949 } else {
951 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
952 PMV.AlignedAddrAlignment, MemOpOrder,
953 PerformPartwordOp);
954 }
955
956 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
957 AI->replaceAllUsesWith(FinalOldResult);
958 AI->eraseFromParent();
959}
960
961/// Copy metadata that's safe to preserve when widening atomics.
963 const Instruction &Source) {
965 Source.getAllMetadata(MD);
966 LLVMContext &Ctx = Dest.getContext();
967 MDBuilder MDB(Ctx);
968
969 for (auto [ID, N] : MD) {
970 switch (ID) {
971 case LLVMContext::MD_dbg:
972 case LLVMContext::MD_tbaa:
973 case LLVMContext::MD_tbaa_struct:
974 case LLVMContext::MD_alias_scope:
975 case LLVMContext::MD_noalias:
976 case LLVMContext::MD_access_group:
977 case LLVMContext::MD_mmra:
978 Dest.setMetadata(ID, N);
979 break;
980 default:
981 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
982 Dest.setMetadata(ID, N);
983 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
984 Dest.setMetadata(ID, N);
985
986 break;
987 }
988 }
989}
990
991// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
992AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
993 ReplacementIRBuilder Builder(AI, *DL);
995
998 "Unable to widen operation");
999
1000 PartwordMaskValues PMV =
1001 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1002 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1003
1004 Value *ValOperand_Shifted =
1005 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
1006 PMV.ShiftAmt, "ValOperand_Shifted");
1007
1008 Value *NewOperand;
1009
1010 if (Op == AtomicRMWInst::And)
1011 NewOperand =
1012 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
1013 else
1014 NewOperand = ValOperand_Shifted;
1015
1016 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1017 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1018 AI->getOrdering(), AI->getSyncScopeID());
1019
1020 copyMetadataForAtomic(*NewAI, *AI);
1021
1022 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1023 AI->replaceAllUsesWith(FinalOldResult);
1024 AI->eraseFromParent();
1025 return NewAI;
1026}
1027
1028bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1029 // The basic idea here is that we're expanding a cmpxchg of a
1030 // smaller memory size up to a word-sized cmpxchg. To do this, we
1031 // need to add a retry-loop for strong cmpxchg, so that
1032 // modifications to other parts of the word don't cause a spurious
1033 // failure.
1034
1035 // This generates code like the following:
1036 // [[Setup mask values PMV.*]]
1037 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1038 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1039 // %InitLoaded = load i32* %addr
1040 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1041 // br partword.cmpxchg.loop
1042 // partword.cmpxchg.loop:
1043 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1044 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1045 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1046 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1047 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1048 // i32 %FullWord_NewVal success_ordering failure_ordering
1049 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1050 // %Success = extractvalue { i32, i1 } %NewCI, 1
1051 // br i1 %Success, label %partword.cmpxchg.end,
1052 // label %partword.cmpxchg.failure
1053 // partword.cmpxchg.failure:
1054 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1055 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1056 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1057 // label %partword.cmpxchg.end
1058 // partword.cmpxchg.end:
1059 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1060 // %FinalOldVal = trunc i32 %tmp1 to i8
1061 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1062 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1063
1064 Value *Addr = CI->getPointerOperand();
1065 Value *Cmp = CI->getCompareOperand();
1066 Value *NewVal = CI->getNewValOperand();
1067
1068 BasicBlock *BB = CI->getParent();
1069 Function *F = BB->getParent();
1070 ReplacementIRBuilder Builder(CI, *DL);
1071 LLVMContext &Ctx = Builder.getContext();
1072
1073 BasicBlock *EndBB =
1074 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1075 auto FailureBB =
1076 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1077 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1078
1079 // The split call above "helpfully" added a branch at the end of BB
1080 // (to the wrong place).
1081 std::prev(BB->end())->eraseFromParent();
1082 Builder.SetInsertPoint(BB);
1083
1084 PartwordMaskValues PMV =
1085 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1086 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1087
1088 // Shift the incoming values over, into the right location in the word.
1089 Value *NewVal_Shifted =
1090 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1091 Value *Cmp_Shifted =
1092 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1093
1094 // Load the entire current word, and mask into place the expected and new
1095 // values
1096 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1097 InitLoaded->setVolatile(CI->isVolatile());
1098 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1099 Builder.CreateBr(LoopBB);
1100
1101 // partword.cmpxchg.loop:
1102 Builder.SetInsertPoint(LoopBB);
1103 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1104 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1105
1106 // Mask/Or the expected and new values into place in the loaded word.
1107 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1108 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1109 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1110 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1112 NewCI->setVolatile(CI->isVolatile());
1113 // When we're building a strong cmpxchg, we need a loop, so you
1114 // might think we could use a weak cmpxchg inside. But, using strong
1115 // allows the below comparison for ShouldContinue, and we're
1116 // expecting the underlying cmpxchg to be a machine instruction,
1117 // which is strong anyways.
1118 NewCI->setWeak(CI->isWeak());
1119
1120 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1121 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1122
1123 if (CI->isWeak())
1124 Builder.CreateBr(EndBB);
1125 else
1126 Builder.CreateCondBr(Success, EndBB, FailureBB);
1127
1128 // partword.cmpxchg.failure:
1129 Builder.SetInsertPoint(FailureBB);
1130 // Upon failure, verify that the masked-out part of the loaded value
1131 // has been modified. If it didn't, abort the cmpxchg, since the
1132 // masked-in part must've.
1133 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1134 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1135 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1136
1137 // Add the second value to the phi from above
1138 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1139
1140 // partword.cmpxchg.end:
1141 Builder.SetInsertPoint(CI);
1142
1143 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1144 Value *Res = PoisonValue::get(CI->getType());
1145 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1146 Res = Builder.CreateInsertValue(Res, Success, 1);
1147
1148 CI->replaceAllUsesWith(Res);
1149 CI->eraseFromParent();
1150 return true;
1151}
1152
1153void AtomicExpandImpl::expandAtomicOpToLLSC(
1154 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1155 AtomicOrdering MemOpOrder,
1156 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1157 ReplacementIRBuilder Builder(I, *DL);
1158 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1159 MemOpOrder, PerformOp);
1160
1161 I->replaceAllUsesWith(Loaded);
1162 I->eraseFromParent();
1163}
1164
1165void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1166 ReplacementIRBuilder Builder(AI, *DL);
1167
1168 PartwordMaskValues PMV =
1169 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1170 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1171
1172 // The value operand must be sign-extended for signed min/max so that the
1173 // target's signed comparison instructions can be used. Otherwise, just
1174 // zero-ext.
1175 Instruction::CastOps CastOp = Instruction::ZExt;
1176 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1177 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1178 CastOp = Instruction::SExt;
1179
1180 Value *ValOperand_Shifted = Builder.CreateShl(
1181 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1182 PMV.ShiftAmt, "ValOperand_Shifted");
1183 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1184 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1185 AI->getOrdering());
1186 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1187 AI->replaceAllUsesWith(FinalOldResult);
1188 AI->eraseFromParent();
1189}
1190
1191void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1192 AtomicCmpXchgInst *CI) {
1193 ReplacementIRBuilder Builder(CI, *DL);
1194
1195 PartwordMaskValues PMV = createMaskInstrs(
1196 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1197 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1198
1199 Value *CmpVal_Shifted = Builder.CreateShl(
1200 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1201 "CmpVal_Shifted");
1202 Value *NewVal_Shifted = Builder.CreateShl(
1203 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1204 "NewVal_Shifted");
1205 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1206 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1207 CI->getMergedOrdering());
1208 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1209 Value *Res = PoisonValue::get(CI->getType());
1210 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1211 Value *Success = Builder.CreateICmpEQ(
1212 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1213 Res = Builder.CreateInsertValue(Res, Success, 1);
1214
1215 CI->replaceAllUsesWith(Res);
1216 CI->eraseFromParent();
1217}
1218
1219Value *AtomicExpandImpl::insertRMWLLSCLoop(
1220 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1221 AtomicOrdering MemOpOrder,
1222 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1223 LLVMContext &Ctx = Builder.getContext();
1224 BasicBlock *BB = Builder.GetInsertBlock();
1225 Function *F = BB->getParent();
1226
1227 assert(AddrAlign >=
1228 F->getDataLayout().getTypeStoreSize(ResultTy) &&
1229 "Expected at least natural alignment at this point.");
1230
1231 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1232 //
1233 // The standard expansion we produce is:
1234 // [...]
1235 // atomicrmw.start:
1236 // %loaded = @load.linked(%addr)
1237 // %new = some_op iN %loaded, %incr
1238 // %stored = @store_conditional(%new, %addr)
1239 // %try_again = icmp i32 ne %stored, 0
1240 // br i1 %try_again, label %loop, label %atomicrmw.end
1241 // atomicrmw.end:
1242 // [...]
1243 BasicBlock *ExitBB =
1244 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1245 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1246
1247 // The split call above "helpfully" added a branch at the end of BB (to the
1248 // wrong place).
1249 std::prev(BB->end())->eraseFromParent();
1250 Builder.SetInsertPoint(BB);
1251 Builder.CreateBr(LoopBB);
1252
1253 // Start the main loop block now that we've taken care of the preliminaries.
1254 Builder.SetInsertPoint(LoopBB);
1255 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1256
1257 Value *NewVal = PerformOp(Builder, Loaded);
1258
1259 Value *StoreSuccess =
1260 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1261 Value *TryAgain = Builder.CreateICmpNE(
1262 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1263 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1264
1265 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1266 return Loaded;
1267}
1268
1269/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1270/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1271/// IR. As a migration step, we convert back to what use to be the standard
1272/// way to represent a pointer cmpxchg so that we can update backends one by
1273/// one.
1275AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1276 auto *M = CI->getModule();
1277 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1278 M->getDataLayout());
1279
1280 ReplacementIRBuilder Builder(CI, *DL);
1281
1282 Value *Addr = CI->getPointerOperand();
1283
1284 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1285 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1286
1287 auto *NewCI = Builder.CreateAtomicCmpXchg(
1288 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1289 CI->getFailureOrdering(), CI->getSyncScopeID());
1290 NewCI->setVolatile(CI->isVolatile());
1291 NewCI->setWeak(CI->isWeak());
1292 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1293
1294 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1295 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1296
1297 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1298
1299 Value *Res = PoisonValue::get(CI->getType());
1300 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1301 Res = Builder.CreateInsertValue(Res, Succ, 1);
1302
1303 CI->replaceAllUsesWith(Res);
1304 CI->eraseFromParent();
1305 return NewCI;
1306}
1307
1308bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1309 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1310 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1311 Value *Addr = CI->getPointerOperand();
1312 BasicBlock *BB = CI->getParent();
1313 Function *F = BB->getParent();
1314 LLVMContext &Ctx = F->getContext();
1315 // If shouldInsertFencesForAtomic() returns true, then the target does not
1316 // want to deal with memory orders, and emitLeading/TrailingFence should take
1317 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1318 // should preserve the ordering.
1319 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1320 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1322 : CI->getMergedOrdering();
1323
1324 // In implementations which use a barrier to achieve release semantics, we can
1325 // delay emitting this barrier until we know a store is actually going to be
1326 // attempted. The cost of this delay is that we need 2 copies of the block
1327 // emitting the load-linked, affecting code size.
1328 //
1329 // Ideally, this logic would be unconditional except for the minsize check
1330 // since in other cases the extra blocks naturally collapse down to the
1331 // minimal loop. Unfortunately, this puts too much stress on later
1332 // optimisations so we avoid emitting the extra logic in those cases too.
1333 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1334 SuccessOrder != AtomicOrdering::Monotonic &&
1335 SuccessOrder != AtomicOrdering::Acquire &&
1336 !F->hasMinSize();
1337
1338 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1339 // do it even on minsize.
1340 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1341
1342 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1343 //
1344 // The full expansion we produce is:
1345 // [...]
1346 // %aligned.addr = ...
1347 // cmpxchg.start:
1348 // %unreleasedload = @load.linked(%aligned.addr)
1349 // %unreleasedload.extract = extract value from %unreleasedload
1350 // %should_store = icmp eq %unreleasedload.extract, %desired
1351 // br i1 %should_store, label %cmpxchg.releasingstore,
1352 // label %cmpxchg.nostore
1353 // cmpxchg.releasingstore:
1354 // fence?
1355 // br label cmpxchg.trystore
1356 // cmpxchg.trystore:
1357 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1358 // [%releasedload, %cmpxchg.releasedload]
1359 // %updated.new = insert %new into %loaded.trystore
1360 // %stored = @store_conditional(%updated.new, %aligned.addr)
1361 // %success = icmp eq i32 %stored, 0
1362 // br i1 %success, label %cmpxchg.success,
1363 // label %cmpxchg.releasedload/%cmpxchg.failure
1364 // cmpxchg.releasedload:
1365 // %releasedload = @load.linked(%aligned.addr)
1366 // %releasedload.extract = extract value from %releasedload
1367 // %should_store = icmp eq %releasedload.extract, %desired
1368 // br i1 %should_store, label %cmpxchg.trystore,
1369 // label %cmpxchg.failure
1370 // cmpxchg.success:
1371 // fence?
1372 // br label %cmpxchg.end
1373 // cmpxchg.nostore:
1374 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1375 // [%releasedload,
1376 // %cmpxchg.releasedload/%cmpxchg.trystore]
1377 // @load_linked_fail_balance()?
1378 // br label %cmpxchg.failure
1379 // cmpxchg.failure:
1380 // fence?
1381 // br label %cmpxchg.end
1382 // cmpxchg.end:
1383 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1384 // [%loaded.trystore, %cmpxchg.trystore]
1385 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1386 // %loaded = extract value from %loaded.exit
1387 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1388 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1389 // [...]
1390 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1391 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1392 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1393 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1394 auto ReleasedLoadBB =
1395 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1396 auto TryStoreBB =
1397 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1398 auto ReleasingStoreBB =
1399 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1400 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1401
1402 ReplacementIRBuilder Builder(CI, *DL);
1403
1404 // The split call above "helpfully" added a branch at the end of BB (to the
1405 // wrong place), but we might want a fence too. It's easiest to just remove
1406 // the branch entirely.
1407 std::prev(BB->end())->eraseFromParent();
1408 Builder.SetInsertPoint(BB);
1409 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1410 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1411
1412 PartwordMaskValues PMV =
1413 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1414 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1415 Builder.CreateBr(StartBB);
1416
1417 // Start the main loop block now that we've taken care of the preliminaries.
1418 Builder.SetInsertPoint(StartBB);
1419 Value *UnreleasedLoad =
1420 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1421 Value *UnreleasedLoadExtract =
1422 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1423 Value *ShouldStore = Builder.CreateICmpEQ(
1424 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1425
1426 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1427 // jump straight past that fence instruction (if it exists).
1428 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1429
1430 Builder.SetInsertPoint(ReleasingStoreBB);
1431 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1432 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1433 Builder.CreateBr(TryStoreBB);
1434
1435 Builder.SetInsertPoint(TryStoreBB);
1436 PHINode *LoadedTryStore =
1437 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1438 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1439 Value *NewValueInsert =
1440 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1441 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1442 PMV.AlignedAddr, MemOpOrder);
1443 StoreSuccess = Builder.CreateICmpEQ(
1444 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1445 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1446 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1447 CI->isWeak() ? FailureBB : RetryBB);
1448
1449 Builder.SetInsertPoint(ReleasedLoadBB);
1450 Value *SecondLoad;
1451 if (HasReleasedLoadBB) {
1452 SecondLoad =
1453 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1454 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1455 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1456 CI->getCompareOperand(), "should_store");
1457
1458 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1459 // jump straight past that fence instruction (if it exists).
1460 Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1461 // Update PHI node in TryStoreBB.
1462 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1463 } else
1464 Builder.CreateUnreachable();
1465
1466 // Make sure later instructions don't get reordered with a fence if
1467 // necessary.
1468 Builder.SetInsertPoint(SuccessBB);
1469 if (ShouldInsertFencesForAtomic ||
1470 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1471 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1472 Builder.CreateBr(ExitBB);
1473
1474 Builder.SetInsertPoint(NoStoreBB);
1475 PHINode *LoadedNoStore =
1476 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1477 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1478 if (HasReleasedLoadBB)
1479 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1480
1481 // In the failing case, where we don't execute the store-conditional, the
1482 // target might want to balance out the load-linked with a dedicated
1483 // instruction (e.g., on ARM, clearing the exclusive monitor).
1484 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1485 Builder.CreateBr(FailureBB);
1486
1487 Builder.SetInsertPoint(FailureBB);
1488 PHINode *LoadedFailure =
1489 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1490 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1491 if (CI->isWeak())
1492 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1493 if (ShouldInsertFencesForAtomic)
1494 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1495 Builder.CreateBr(ExitBB);
1496
1497 // Finally, we have control-flow based knowledge of whether the cmpxchg
1498 // succeeded or not. We expose this to later passes by converting any
1499 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1500 // PHI.
1501 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1502 PHINode *LoadedExit =
1503 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1504 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1505 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1506 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1507 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1508 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1509
1510 // This is the "exit value" from the cmpxchg expansion. It may be of
1511 // a type wider than the one in the cmpxchg instruction.
1512 Value *LoadedFull = LoadedExit;
1513
1514 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1515 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1516
1517 // Look for any users of the cmpxchg that are just comparing the loaded value
1518 // against the desired one, and replace them with the CFG-derived version.
1520 for (auto *User : CI->users()) {
1521 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1522 if (!EV)
1523 continue;
1524
1525 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1526 "weird extraction from { iN, i1 }");
1527
1528 if (EV->getIndices()[0] == 0)
1529 EV->replaceAllUsesWith(Loaded);
1530 else
1532
1533 PrunedInsts.push_back(EV);
1534 }
1535
1536 // We can remove the instructions now we're no longer iterating through them.
1537 for (auto *EV : PrunedInsts)
1538 EV->eraseFromParent();
1539
1540 if (!CI->use_empty()) {
1541 // Some use of the full struct return that we don't understand has happened,
1542 // so we've got to reconstruct it properly.
1543 Value *Res;
1544 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1545 Res = Builder.CreateInsertValue(Res, Success, 1);
1546
1547 CI->replaceAllUsesWith(Res);
1548 }
1549
1550 CI->eraseFromParent();
1551 return true;
1552}
1553
1554bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1555 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1556 if (!C)
1557 return false;
1558
1560 switch (Op) {
1561 case AtomicRMWInst::Add:
1562 case AtomicRMWInst::Sub:
1563 case AtomicRMWInst::Or:
1564 case AtomicRMWInst::Xor:
1565 return C->isZero();
1566 case AtomicRMWInst::And:
1567 return C->isMinusOne();
1568 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1569 default:
1570 return false;
1571 }
1572}
1573
1574bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1575 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1576 tryExpandAtomicLoad(ResultingLoad);
1577 return true;
1578 }
1579 return false;
1580}
1581
1582Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1583 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1584 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1585 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1586 CreateCmpXchgInstFun CreateCmpXchg) {
1587 LLVMContext &Ctx = Builder.getContext();
1588 BasicBlock *BB = Builder.GetInsertBlock();
1589 Function *F = BB->getParent();
1590
1591 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1592 //
1593 // The standard expansion we produce is:
1594 // [...]
1595 // %init_loaded = load atomic iN* %addr
1596 // br label %loop
1597 // loop:
1598 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1599 // %new = some_op iN %loaded, %incr
1600 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1601 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1602 // %success = extractvalue { iN, i1 } %pair, 1
1603 // br i1 %success, label %atomicrmw.end, label %loop
1604 // atomicrmw.end:
1605 // [...]
1606 BasicBlock *ExitBB =
1607 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1608 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1609
1610 // The split call above "helpfully" added a branch at the end of BB (to the
1611 // wrong place), but we want a load. It's easiest to just remove
1612 // the branch entirely.
1613 std::prev(BB->end())->eraseFromParent();
1614 Builder.SetInsertPoint(BB);
1615 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1616 Builder.CreateBr(LoopBB);
1617
1618 // Start the main loop block now that we've taken care of the preliminaries.
1619 Builder.SetInsertPoint(LoopBB);
1620 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1621 Loaded->addIncoming(InitLoaded, BB);
1622
1623 Value *NewVal = PerformOp(Builder, Loaded);
1624
1625 Value *NewLoaded = nullptr;
1626 Value *Success = nullptr;
1627
1628 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1629 MemOpOrder == AtomicOrdering::Unordered
1631 : MemOpOrder,
1632 SSID, Success, NewLoaded);
1633 assert(Success && NewLoaded);
1634
1635 Loaded->addIncoming(NewLoaded, LoopBB);
1636
1637 Builder.CreateCondBr(Success, ExitBB, LoopBB);
1638
1639 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1640 return NewLoaded;
1641}
1642
1643bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1644 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1645 unsigned ValueSize = getAtomicOpSize(CI);
1646
1647 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1648 default:
1649 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1651 if (ValueSize < MinCASSize)
1652 return expandPartwordCmpXchg(CI);
1653 return false;
1655 return expandAtomicCmpXchg(CI);
1656 }
1658 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1659 return true;
1661 return lowerAtomicCmpXchgInst(CI);
1662 }
1663}
1664
1665// Note: This function is exposed externally by AtomicExpandUtils.h
1667 CreateCmpXchgInstFun CreateCmpXchg) {
1668 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1669 Builder.setIsFPConstrained(
1670 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1671
1672 // FIXME: If FP exceptions are observable, we should force them off for the
1673 // loop for the FP atomics.
1674 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1675 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1676 AI->getOrdering(), AI->getSyncScopeID(),
1677 [&](IRBuilderBase &Builder, Value *Loaded) {
1678 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1679 AI->getValOperand());
1680 },
1681 CreateCmpXchg);
1682
1683 AI->replaceAllUsesWith(Loaded);
1684 AI->eraseFromParent();
1685 return true;
1686}
1687
1688// In order to use one of the sized library calls such as
1689// __atomic_fetch_add_4, the alignment must be sufficient, the size
1690// must be one of the potentially-specialized sizes, and the value
1691// type must actually exist in C on the target (otherwise, the
1692// function wouldn't actually be defined.)
1693static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1694 const DataLayout &DL) {
1695 // TODO: "LargestSize" is an approximation for "largest type that
1696 // you can express in C". It seems to be the case that int128 is
1697 // supported on all 64-bit platforms, otherwise only up to 64-bit
1698 // integers are supported. If we get this wrong, then we'll try to
1699 // call a sized libcall that doesn't actually exist. There should
1700 // really be some more reliable way in LLVM of determining integer
1701 // sizes which are valid in the target's C ABI...
1702 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1703 return Alignment >= Size &&
1704 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1705 Size <= LargestSize;
1706}
1707
1708void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1709 static const RTLIB::Libcall Libcalls[6] = {
1710 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1711 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1712 unsigned Size = getAtomicOpSize(I);
1713
1714 bool expanded = expandAtomicOpToLibcall(
1715 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1716 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1717 if (!expanded)
1718 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
1719}
1720
1721void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1722 static const RTLIB::Libcall Libcalls[6] = {
1723 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1724 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1725 unsigned Size = getAtomicOpSize(I);
1726
1727 bool expanded = expandAtomicOpToLibcall(
1728 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1729 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1730 if (!expanded)
1731 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
1732}
1733
1734void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1735 static const RTLIB::Libcall Libcalls[6] = {
1736 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1737 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1738 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1739 unsigned Size = getAtomicOpSize(I);
1740
1741 bool expanded = expandAtomicOpToLibcall(
1742 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1743 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1744 Libcalls);
1745 if (!expanded)
1746 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
1747}
1748
1750 static const RTLIB::Libcall LibcallsXchg[6] = {
1751 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1752 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1753 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1754 static const RTLIB::Libcall LibcallsAdd[6] = {
1755 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1756 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1757 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1758 static const RTLIB::Libcall LibcallsSub[6] = {
1759 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1760 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1761 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1762 static const RTLIB::Libcall LibcallsAnd[6] = {
1763 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1764 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1765 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1766 static const RTLIB::Libcall LibcallsOr[6] = {
1767 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1768 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1769 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1770 static const RTLIB::Libcall LibcallsXor[6] = {
1771 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1772 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1773 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1774 static const RTLIB::Libcall LibcallsNand[6] = {
1775 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1776 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1777 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1778
1779 switch (Op) {
1781 llvm_unreachable("Should not have BAD_BINOP.");
1783 return ArrayRef(LibcallsXchg);
1784 case AtomicRMWInst::Add:
1785 return ArrayRef(LibcallsAdd);
1786 case AtomicRMWInst::Sub:
1787 return ArrayRef(LibcallsSub);
1788 case AtomicRMWInst::And:
1789 return ArrayRef(LibcallsAnd);
1790 case AtomicRMWInst::Or:
1791 return ArrayRef(LibcallsOr);
1792 case AtomicRMWInst::Xor:
1793 return ArrayRef(LibcallsXor);
1795 return ArrayRef(LibcallsNand);
1796 case AtomicRMWInst::Max:
1797 case AtomicRMWInst::Min:
1806 // No atomic libcalls are available for max/min/umax/umin.
1807 return {};
1808 }
1809 llvm_unreachable("Unexpected AtomicRMW operation.");
1810}
1811
1812void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1813 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1814
1815 unsigned Size = getAtomicOpSize(I);
1816
1817 bool Success = false;
1818 if (!Libcalls.empty())
1819 Success = expandAtomicOpToLibcall(
1820 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1821 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1822
1823 // The expansion failed: either there were no libcalls at all for
1824 // the operation (min/max), or there were only size-specialized
1825 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1826 // CAS libcall, via a CAS loop, instead.
1827 if (!Success) {
1829 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1830 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1831 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
1832 // Create the CAS instruction normally...
1833 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1834 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1836 Success = Builder.CreateExtractValue(Pair, 1, "success");
1837 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1838
1839 // ...and then expand the CAS into a libcall.
1840 expandAtomicCASToLibcall(Pair);
1841 });
1842 }
1843}
1844
1845// A helper routine for the above expandAtomic*ToLibcall functions.
1846//
1847// 'Libcalls' contains an array of enum values for the particular
1848// ATOMIC libcalls to be emitted. All of the other arguments besides
1849// 'I' are extracted from the Instruction subclass by the
1850// caller. Depending on the particular call, some will be null.
1851bool AtomicExpandImpl::expandAtomicOpToLibcall(
1852 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1853 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1854 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1855 assert(Libcalls.size() == 6);
1856
1857 LLVMContext &Ctx = I->getContext();
1858 Module *M = I->getModule();
1859 const DataLayout &DL = M->getDataLayout();
1860 IRBuilder<> Builder(I);
1861 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1862
1863 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1864 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1865
1866 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1867
1868 // TODO: the "order" argument type is "int", not int32. So
1869 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1870 ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1871 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1872 Constant *OrderingVal =
1873 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1874 Constant *Ordering2Val = nullptr;
1875 if (CASExpected) {
1876 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1877 Ordering2Val =
1878 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1879 }
1880 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1881
1882 RTLIB::Libcall RTLibType;
1883 if (UseSizedLibcall) {
1884 switch (Size) {
1885 case 1:
1886 RTLibType = Libcalls[1];
1887 break;
1888 case 2:
1889 RTLibType = Libcalls[2];
1890 break;
1891 case 4:
1892 RTLibType = Libcalls[3];
1893 break;
1894 case 8:
1895 RTLibType = Libcalls[4];
1896 break;
1897 case 16:
1898 RTLibType = Libcalls[5];
1899 break;
1900 }
1901 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1902 RTLibType = Libcalls[0];
1903 } else {
1904 // Can't use sized function, and there's no generic for this
1905 // operation, so give up.
1906 return false;
1907 }
1908
1909 if (!TLI->getLibcallName(RTLibType)) {
1910 // This target does not implement the requested atomic libcall so give up.
1911 return false;
1912 }
1913
1914 // Build up the function call. There's two kinds. First, the sized
1915 // variants. These calls are going to be one of the following (with
1916 // N=1,2,4,8,16):
1917 // iN __atomic_load_N(iN *ptr, int ordering)
1918 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1919 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1920 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1921 // int success_order, int failure_order)
1922 //
1923 // Note that these functions can be used for non-integer atomic
1924 // operations, the values just need to be bitcast to integers on the
1925 // way in and out.
1926 //
1927 // And, then, the generic variants. They look like the following:
1928 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1929 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1930 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1931 // int ordering)
1932 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1933 // void *desired, int success_order,
1934 // int failure_order)
1935 //
1936 // The different signatures are built up depending on the
1937 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1938 // variables.
1939
1940 AllocaInst *AllocaCASExpected = nullptr;
1941 AllocaInst *AllocaValue = nullptr;
1942 AllocaInst *AllocaResult = nullptr;
1943
1944 Type *ResultTy;
1946 AttributeList Attr;
1947
1948 // 'size' argument.
1949 if (!UseSizedLibcall) {
1950 // Note, getIntPtrType is assumed equivalent to size_t.
1951 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1952 }
1953
1954 // 'ptr' argument.
1955 // note: This assumes all address spaces share a common libfunc
1956 // implementation and that addresses are convertable. For systems without
1957 // that property, we'd need to extend this mechanism to support AS-specific
1958 // families of atomic intrinsics.
1959 Value *PtrVal = PointerOperand;
1960 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
1961 Args.push_back(PtrVal);
1962
1963 // 'expected' argument, if present.
1964 if (CASExpected) {
1965 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1966 AllocaCASExpected->setAlignment(AllocaAlignment);
1967 Builder.CreateLifetimeStart(AllocaCASExpected, SizeVal64);
1968 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1969 Args.push_back(AllocaCASExpected);
1970 }
1971
1972 // 'val' argument ('desired' for cas), if present.
1973 if (ValueOperand) {
1974 if (UseSizedLibcall) {
1975 Value *IntValue =
1976 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1977 Args.push_back(IntValue);
1978 } else {
1979 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1980 AllocaValue->setAlignment(AllocaAlignment);
1981 Builder.CreateLifetimeStart(AllocaValue, SizeVal64);
1982 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1983 Args.push_back(AllocaValue);
1984 }
1985 }
1986
1987 // 'ret' argument.
1988 if (!CASExpected && HasResult && !UseSizedLibcall) {
1989 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1990 AllocaResult->setAlignment(AllocaAlignment);
1991 Builder.CreateLifetimeStart(AllocaResult, SizeVal64);
1992 Args.push_back(AllocaResult);
1993 }
1994
1995 // 'ordering' ('success_order' for cas) argument.
1996 Args.push_back(OrderingVal);
1997
1998 // 'failure_order' argument, if present.
1999 if (Ordering2Val)
2000 Args.push_back(Ordering2Val);
2001
2002 // Now, the return type.
2003 if (CASExpected) {
2004 ResultTy = Type::getInt1Ty(Ctx);
2005 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
2006 } else if (HasResult && UseSizedLibcall)
2007 ResultTy = SizedIntTy;
2008 else
2009 ResultTy = Type::getVoidTy(Ctx);
2010
2011 // Done with setting up arguments and return types, create the call:
2013 for (Value *Arg : Args)
2014 ArgTys.push_back(Arg->getType());
2015 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
2016 FunctionCallee LibcallFn =
2017 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
2018 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
2019 Call->setAttributes(Attr);
2020 Value *Result = Call;
2021
2022 // And then, extract the results...
2023 if (ValueOperand && !UseSizedLibcall)
2024 Builder.CreateLifetimeEnd(AllocaValue, SizeVal64);
2025
2026 if (CASExpected) {
2027 // The final result from the CAS is {load of 'expected' alloca, bool result
2028 // from call}
2029 Type *FinalResultTy = I->getType();
2030 Value *V = PoisonValue::get(FinalResultTy);
2031 Value *ExpectedOut = Builder.CreateAlignedLoad(
2032 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2033 Builder.CreateLifetimeEnd(AllocaCASExpected, SizeVal64);
2034 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2035 V = Builder.CreateInsertValue(V, Result, 1);
2036 I->replaceAllUsesWith(V);
2037 } else if (HasResult) {
2038 Value *V;
2039 if (UseSizedLibcall)
2040 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2041 else {
2042 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2043 AllocaAlignment);
2044 Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
2045 }
2046 I->replaceAllUsesWith(V);
2047 }
2048 I->eraseFromParent();
2049 return true;
2050}
#define Success
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
#define DEBUG_TYPE
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:203
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
#define DEBUG_TYPE
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
Module.h This file contains the declarations for the Module class.
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
Definition: Instructions.h:61
void setAlignment(Align Align)
Definition: Instructions.h:126
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:495
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
Definition: Instructions.h:599
void setWeak(bool IsWeak)
Definition: Instructions.h:556
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:547
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:586
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:644
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:536
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:554
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:551
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:574
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:612
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:809
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
Definition: Instructions.h:819
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:708
@ Add
*p = old + v
Definition: Instructions.h:712
@ FAdd
*p = old + v
Definition: Instructions.h:733
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:726
@ Or
*p = old | v
Definition: Instructions.h:720
@ Sub
*p = old - v
Definition: Instructions.h:714
@ And
*p = old & v
Definition: Instructions.h:716
@ Xor
*p = old ^ v
Definition: Instructions.h:722
@ FSub
*p = old - v
Definition: Instructions.h:736
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:724
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:730
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:744
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:728
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:740
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
@ Nand
*p = ~(old & v)
Definition: Instructions.h:718
Value * getPointerOperand()
Definition: Instructions.h:852
BinOp getOperation() const
Definition: Instructions.h:787
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:843
Value * getValOperand()
Definition: Instructions.h:856
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:829
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
Definition: Attributes.h:584
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:577
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:81
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:850
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:857
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:370
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1858
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2543
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:536
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1824
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1280
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:482
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2536
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:933
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2142
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1454
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2265
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:230
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2225
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2417
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1766
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2261
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition: IRBuilder.h:314
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2147
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1137
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1807
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1433
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2041
LLVMContext & getContext() const
Definition: IRBuilder.h:173
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1492
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2137
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1871
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2027
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:497
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1514
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1131
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2181
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1843
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2432
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1536
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2152
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition: IRBuilder.h:74
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2686
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1642
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:266
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
unsigned getMDKindID(StringRef Name) const
getMDKindID - Return a unique non-zero ID for the specified metadata kind.
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Definition: Instructions.h:174
Value * getPointerOperand()
Definition: Instructions.h:253
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:203
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:239
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:218
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:206
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:228
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:209
Metadata node.
Definition: Metadata.h:1069
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:299
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
An instruction for storing to memory.
Definition: Instructions.h:290
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:324
void setAlignment(Align Align)
Definition: Instructions.h:333
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:360
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:261
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:251
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeAtomicExpandLegacyPass(PassRegistry &)
bool canInstructionHaveMMRAs(const Instruction &I)
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
Definition: LowerAtomic.cpp:42
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
Definition: LowerAtomic.cpp:22
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:398