LLVM 20.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/Instruction.h"
38#include "llvm/IR/MDBuilder.h"
40#include "llvm/IR/Module.h"
41#include "llvm/IR/Type.h"
42#include "llvm/IR/User.h"
43#include "llvm/IR/Value.h"
45#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
53#include <cassert>
54#include <cstdint>
55#include <iterator>
56
57using namespace llvm;
58
59#define DEBUG_TYPE "atomic-expand"
60
61namespace {
62
63class AtomicExpandImpl {
64 const TargetLowering *TLI = nullptr;
65 const DataLayout *DL = nullptr;
66
67private:
68 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
69 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
70 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
71 bool tryExpandAtomicLoad(LoadInst *LI);
72 bool expandAtomicLoadToLL(LoadInst *LI);
73 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
74 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
75 bool tryExpandAtomicStore(StoreInst *SI);
76 void expandAtomicStore(StoreInst *SI);
77 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
78 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
79 Value *
80 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
81 Align AddrAlign, AtomicOrdering MemOpOrder,
82 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
83 void expandAtomicOpToLLSC(
84 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
85 AtomicOrdering MemOpOrder,
86 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
87 void expandPartwordAtomicRMW(
89 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
90 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
91 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
92 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
93
94 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
95 static Value *insertRMWCmpXchgLoop(
96 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
97 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
98 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
99 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
100 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
101
102 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
103 bool isIdempotentRMW(AtomicRMWInst *RMWI);
104 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
105
106 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
107 Value *PointerOperand, Value *ValueOperand,
108 Value *CASExpected, AtomicOrdering Ordering,
109 AtomicOrdering Ordering2,
110 ArrayRef<RTLIB::Libcall> Libcalls);
111 void expandAtomicLoadToLibcall(LoadInst *LI);
112 void expandAtomicStoreToLibcall(StoreInst *LI);
113 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
114 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
115
116 friend bool
118 CreateCmpXchgInstFun CreateCmpXchg);
119
120 bool processAtomicInstr(Instruction *I);
121
122public:
123 bool run(Function &F, const TargetMachine *TM);
124};
125
126class AtomicExpandLegacy : public FunctionPass {
127public:
128 static char ID; // Pass identification, replacement for typeid
129
130 AtomicExpandLegacy() : FunctionPass(ID) {
132 }
133
134 bool runOnFunction(Function &F) override;
135};
136
137// IRBuilder to be used for replacement atomic instructions.
138struct ReplacementIRBuilder
139 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
140 MDNode *MMRAMD = nullptr;
141
142 // Preserves the DebugLoc from I, and preserves still valid metadata.
143 // Enable StrictFP builder mode when appropriate.
144 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
145 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
147 [this](Instruction *I) { addMMRAMD(I); })) {
149 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
150 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
151 this->setIsFPConstrained(true);
152
153 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
154 }
155
156 void addMMRAMD(Instruction *I) {
158 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
159 }
160};
161
162} // end anonymous namespace
163
164char AtomicExpandLegacy::ID = 0;
165
166char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
167
169 "Expand Atomic instructions", false, false)
171INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
172 "Expand Atomic instructions", false, false)
173
174// Helper functions to retrieve the size of atomic instructions.
175static unsigned getAtomicOpSize(LoadInst *LI) {
176 const DataLayout &DL = LI->getDataLayout();
177 return DL.getTypeStoreSize(LI->getType());
178}
179
180static unsigned getAtomicOpSize(StoreInst *SI) {
181 const DataLayout &DL = SI->getDataLayout();
182 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
183}
184
185static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
186 const DataLayout &DL = RMWI->getDataLayout();
187 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
188}
189
190static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
191 const DataLayout &DL = CASI->getDataLayout();
192 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
193}
194
195/// Copy metadata that's safe to preserve when widening atomics.
197 const Instruction &Source) {
199 Source.getAllMetadata(MD);
200 LLVMContext &Ctx = Dest.getContext();
201 MDBuilder MDB(Ctx);
202
203 for (auto [ID, N] : MD) {
204 switch (ID) {
205 case LLVMContext::MD_dbg:
206 case LLVMContext::MD_tbaa:
207 case LLVMContext::MD_tbaa_struct:
208 case LLVMContext::MD_alias_scope:
209 case LLVMContext::MD_noalias:
210 case LLVMContext::MD_noalias_addrspace:
211 case LLVMContext::MD_access_group:
212 case LLVMContext::MD_mmra:
213 Dest.setMetadata(ID, N);
214 break;
215 default:
216 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
217 Dest.setMetadata(ID, N);
218 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
219 Dest.setMetadata(ID, N);
220
221 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
222 // uses.
223 break;
224 }
225 }
226}
227
228// Determine if a particular atomic operation has a supported size,
229// and is of appropriate alignment, to be passed through for target
230// lowering. (Versus turning into a __atomic libcall)
231template <typename Inst>
232static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
233 unsigned Size = getAtomicOpSize(I);
234 Align Alignment = I->getAlign();
235 return Alignment >= Size &&
237}
238
239bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
240 auto *LI = dyn_cast<LoadInst>(I);
241 auto *SI = dyn_cast<StoreInst>(I);
242 auto *RMWI = dyn_cast<AtomicRMWInst>(I);
243 auto *CASI = dyn_cast<AtomicCmpXchgInst>(I);
244
245 bool MadeChange = false;
246
247 // If the Size/Alignment is not supported, replace with a libcall.
248 if (LI) {
249 if (!LI->isAtomic())
250 return false;
251
252 if (!atomicSizeSupported(TLI, LI)) {
253 expandAtomicLoadToLibcall(LI);
254 return true;
255 }
256
257 if (TLI->shouldCastAtomicLoadInIR(LI) ==
258 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
259 I = LI = convertAtomicLoadToIntegerType(LI);
260 MadeChange = true;
261 }
262 } else if (SI) {
263 if (!SI->isAtomic())
264 return false;
265
266 if (!atomicSizeSupported(TLI, SI)) {
267 expandAtomicStoreToLibcall(SI);
268 return true;
269 }
270
271 if (TLI->shouldCastAtomicStoreInIR(SI) ==
272 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
273 I = SI = convertAtomicStoreToIntegerType(SI);
274 MadeChange = true;
275 }
276 } else if (RMWI) {
277 if (!atomicSizeSupported(TLI, RMWI)) {
278 expandAtomicRMWToLibcall(RMWI);
279 return true;
280 }
281
282 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
283 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
284 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
285 MadeChange = true;
286 }
287 } else if (CASI) {
288 if (!atomicSizeSupported(TLI, CASI)) {
289 expandAtomicCASToLibcall(CASI);
290 return true;
291 }
292
293 // TODO: when we're ready to make the change at the IR level, we can
294 // extend convertCmpXchgToInteger for floating point too.
295 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
296 // TODO: add a TLI hook to control this so that each target can
297 // convert to lowering the original type one at a time.
298 I = CASI = convertCmpXchgToIntegerType(CASI);
299 MadeChange = true;
300 }
301 } else
302 return false;
303
304 if (TLI->shouldInsertFencesForAtomic(I)) {
305 auto FenceOrdering = AtomicOrdering::Monotonic;
306 if (LI && isAcquireOrStronger(LI->getOrdering())) {
307 FenceOrdering = LI->getOrdering();
308 LI->setOrdering(AtomicOrdering::Monotonic);
309 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
310 FenceOrdering = SI->getOrdering();
311 SI->setOrdering(AtomicOrdering::Monotonic);
312 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
313 isAcquireOrStronger(RMWI->getOrdering()))) {
314 FenceOrdering = RMWI->getOrdering();
315 RMWI->setOrdering(AtomicOrdering::Monotonic);
316 } else if (CASI &&
317 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
318 TargetLoweringBase::AtomicExpansionKind::None &&
319 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
320 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
321 isAcquireOrStronger(CASI->getFailureOrdering()))) {
322 // If a compare and swap is lowered to LL/SC, we can do smarter fence
323 // insertion, with a stronger one on the success path than on the
324 // failure path. As a result, fence insertion is directly done by
325 // expandAtomicCmpXchg in that case.
326 FenceOrdering = CASI->getMergedOrdering();
327 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
328 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
329 }
330
331 if (FenceOrdering != AtomicOrdering::Monotonic) {
332 MadeChange |= bracketInstWithFences(I, FenceOrdering);
333 }
334 } else if (I->hasAtomicStore() &&
335 TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
336 auto FenceOrdering = AtomicOrdering::Monotonic;
337 if (SI)
338 FenceOrdering = SI->getOrdering();
339 else if (RMWI)
340 FenceOrdering = RMWI->getOrdering();
341 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
342 TargetLoweringBase::AtomicExpansionKind::LLSC)
343 // LLSC is handled in expandAtomicCmpXchg().
344 FenceOrdering = CASI->getSuccessOrdering();
345
346 IRBuilder Builder(I);
347 if (auto TrailingFence =
348 TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
349 TrailingFence->moveAfter(I);
350 MadeChange = true;
351 }
352 }
353
354 if (LI)
355 MadeChange |= tryExpandAtomicLoad(LI);
356 else if (SI)
357 MadeChange |= tryExpandAtomicStore(SI);
358 else if (RMWI) {
359 // There are two different ways of expanding RMW instructions:
360 // - into a load if it is idempotent
361 // - into a Cmpxchg/LL-SC loop otherwise
362 // we try them in that order.
363
364 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
365 MadeChange = true;
366
367 } else {
368 MadeChange |= tryExpandAtomicRMW(RMWI);
369 }
370 } else if (CASI)
371 MadeChange |= tryExpandAtomicCmpXchg(CASI);
372
373 return MadeChange;
374}
375
376bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {
377 const auto *Subtarget = TM->getSubtargetImpl(F);
378 if (!Subtarget->enableAtomicExpand())
379 return false;
380 TLI = Subtarget->getTargetLowering();
381 DL = &F.getDataLayout();
382
383 bool MadeChange = false;
384
385 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
386 BasicBlock *BB = &*BBI;
387
389
390 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
391 I = Next) {
392 Instruction &Inst = *I;
393 Next = std::next(I);
394
395 if (processAtomicInstr(&Inst)) {
396 MadeChange = true;
397
398 // New blocks may have been inserted.
399 BBE = F.end();
400 }
401 }
402 }
403
404 return MadeChange;
405}
406
407bool AtomicExpandLegacy::runOnFunction(Function &F) {
408
409 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
410 if (!TPC)
411 return false;
412 auto *TM = &TPC->getTM<TargetMachine>();
413 AtomicExpandImpl AE;
414 return AE.run(F, TM);
415}
416
418 return new AtomicExpandLegacy();
419}
420
423 AtomicExpandImpl AE;
424
425 bool Changed = AE.run(F, TM);
426 if (!Changed)
427 return PreservedAnalyses::all();
428
430}
431
432bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
433 AtomicOrdering Order) {
434 ReplacementIRBuilder Builder(I, *DL);
435
436 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
437
438 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
439 // We have a guard here because not every atomic operation generates a
440 // trailing fence.
441 if (TrailingFence)
442 TrailingFence->moveAfter(I);
443
444 return (LeadingFence || TrailingFence);
445}
446
447/// Get the iX type with the same bitwidth as T.
449AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
450 EVT VT = TLI->getMemValueType(DL, T);
451 unsigned BitWidth = VT.getStoreSizeInBits();
452 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
453 return IntegerType::get(T->getContext(), BitWidth);
454}
455
456/// Convert an atomic load of a non-integral type to an integer load of the
457/// equivalent bitwidth. See the function comment on
458/// convertAtomicStoreToIntegerType for background.
459LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
460 auto *M = LI->getModule();
461 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
462
463 ReplacementIRBuilder Builder(LI, *DL);
464
466
467 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
468 NewLI->setAlignment(LI->getAlign());
469 NewLI->setVolatile(LI->isVolatile());
470 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
471 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
472
473 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
474 LI->replaceAllUsesWith(NewVal);
475 LI->eraseFromParent();
476 return NewLI;
477}
478
480AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
482
483 auto *M = RMWI->getModule();
484 Type *NewTy =
485 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
486
487 ReplacementIRBuilder Builder(RMWI, *DL);
488
489 Value *Addr = RMWI->getPointerOperand();
490 Value *Val = RMWI->getValOperand();
491 Value *NewVal = Val->getType()->isPointerTy()
492 ? Builder.CreatePtrToInt(Val, NewTy)
493 : Builder.CreateBitCast(Val, NewTy);
494
495 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
496 RMWI->getAlign(), RMWI->getOrdering(),
497 RMWI->getSyncScopeID());
498 NewRMWI->setVolatile(RMWI->isVolatile());
499 copyMetadataForAtomic(*NewRMWI, *RMWI);
500 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
501
502 Value *NewRVal = RMWI->getType()->isPointerTy()
503 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
504 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
505 RMWI->replaceAllUsesWith(NewRVal);
506 RMWI->eraseFromParent();
507 return NewRMWI;
508}
509
510bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
511 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
513 return false;
515 expandAtomicOpToLLSC(
516 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
517 LI->getOrdering(),
518 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
519 return true;
521 return expandAtomicLoadToLL(LI);
523 return expandAtomicLoadToCmpXchg(LI);
526 return true;
527 default:
528 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
529 }
530}
531
532bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
533 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
535 return false;
537 expandAtomicStore(SI);
538 return true;
540 SI->setAtomic(AtomicOrdering::NotAtomic);
541 return true;
542 default:
543 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
544 }
545}
546
547bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
548 ReplacementIRBuilder Builder(LI, *DL);
549
550 // On some architectures, load-linked instructions are atomic for larger
551 // sizes than normal loads. For example, the only 64-bit load guaranteed
552 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
553 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
554 LI->getPointerOperand(), LI->getOrdering());
555 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
556
557 LI->replaceAllUsesWith(Val);
558 LI->eraseFromParent();
559
560 return true;
561}
562
563bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
564 ReplacementIRBuilder Builder(LI, *DL);
565 AtomicOrdering Order = LI->getOrdering();
566 if (Order == AtomicOrdering::Unordered)
568
570 Type *Ty = LI->getType();
571 Constant *DummyVal = Constant::getNullValue(Ty);
572
573 Value *Pair = Builder.CreateAtomicCmpXchg(
574 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
576 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
577
578 LI->replaceAllUsesWith(Loaded);
579 LI->eraseFromParent();
580
581 return true;
582}
583
584/// Convert an atomic store of a non-integral type to an integer store of the
585/// equivalent bitwidth. We used to not support floating point or vector
586/// atomics in the IR at all. The backends learned to deal with the bitcast
587/// idiom because that was the only way of expressing the notion of a atomic
588/// float or vector store. The long term plan is to teach each backend to
589/// instruction select from the original atomic store, but as a migration
590/// mechanism, we convert back to the old format which the backends understand.
591/// Each backend will need individual work to recognize the new format.
592StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
593 ReplacementIRBuilder Builder(SI, *DL);
594 auto *M = SI->getModule();
595 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
596 M->getDataLayout());
597 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
598
599 Value *Addr = SI->getPointerOperand();
600
601 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
602 NewSI->setAlignment(SI->getAlign());
603 NewSI->setVolatile(SI->isVolatile());
604 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
605 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
606 SI->eraseFromParent();
607 return NewSI;
608}
609
610void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) {
611 // This function is only called on atomic stores that are too large to be
612 // atomic if implemented as a native store. So we replace them by an
613 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
614 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
615 // It is the responsibility of the target to only signal expansion via
616 // shouldExpandAtomicRMW in cases where this is required and possible.
617 ReplacementIRBuilder Builder(SI, *DL);
618 AtomicOrdering Ordering = SI->getOrdering();
622 : Ordering;
623 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
624 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
625 SI->getAlign(), RMWOrdering);
626 SI->eraseFromParent();
627
628 // Now we have an appropriate swap instruction, lower it as usual.
629 tryExpandAtomicRMW(AI);
630}
631
633 Value *Loaded, Value *NewVal, Align AddrAlign,
634 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
635 Value *&Success, Value *&NewLoaded,
636 Instruction *MetadataSrc) {
637 Type *OrigTy = NewVal->getType();
638
639 // This code can go away when cmpxchg supports FP and vector types.
640 assert(!OrigTy->isPointerTy());
641 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
642 if (NeedBitcast) {
643 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
644 NewVal = Builder.CreateBitCast(NewVal, IntTy);
645 Loaded = Builder.CreateBitCast(Loaded, IntTy);
646 }
647
649 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
651 if (MetadataSrc)
652 copyMetadataForAtomic(*Pair, *MetadataSrc);
653
654 Success = Builder.CreateExtractValue(Pair, 1, "success");
655 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
656
657 if (NeedBitcast)
658 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
659}
660
661bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
662 LLVMContext &Ctx = AI->getModule()->getContext();
663 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
664 switch (Kind) {
666 return false;
668 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
669 unsigned ValueSize = getAtomicOpSize(AI);
670 if (ValueSize < MinCASSize) {
671 expandPartwordAtomicRMW(AI,
673 } else {
674 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
675 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
676 AI->getValOperand());
677 };
678 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
679 AI->getAlign(), AI->getOrdering(), PerformOp);
680 }
681 return true;
682 }
684 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
685 unsigned ValueSize = getAtomicOpSize(AI);
686 if (ValueSize < MinCASSize) {
687 expandPartwordAtomicRMW(AI,
689 } else {
691 Ctx.getSyncScopeNames(SSNs);
692 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
693 ? "system"
694 : SSNs[AI->getSyncScopeID()];
696 ORE.emit([&]() {
697 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
698 << "A compare and swap loop was generated for an atomic "
699 << AI->getOperationName(AI->getOperation()) << " operation at "
700 << MemScope << " memory scope";
701 });
703 }
704 return true;
705 }
707 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
708 unsigned ValueSize = getAtomicOpSize(AI);
709 if (ValueSize < MinCASSize) {
711 // Widen And/Or/Xor and give the target another chance at expanding it.
714 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
715 return true;
716 }
717 }
718 expandAtomicRMWToMaskedIntrinsic(AI);
719 return true;
720 }
722 TLI->emitBitTestAtomicRMWIntrinsic(AI);
723 return true;
724 }
726 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
727 return true;
728 }
730 return lowerAtomicRMWInst(AI);
732 TLI->emitExpandAtomicRMW(AI);
733 return true;
734 default:
735 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
736 }
737}
738
739namespace {
740
741struct PartwordMaskValues {
742 // These three fields are guaranteed to be set by createMaskInstrs.
743 Type *WordType = nullptr;
744 Type *ValueType = nullptr;
745 Type *IntValueType = nullptr;
746 Value *AlignedAddr = nullptr;
747 Align AlignedAddrAlignment;
748 // The remaining fields can be null.
749 Value *ShiftAmt = nullptr;
750 Value *Mask = nullptr;
751 Value *Inv_Mask = nullptr;
752};
753
755raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
756 auto PrintObj = [&O](auto *V) {
757 if (V)
758 O << *V;
759 else
760 O << "nullptr";
761 O << '\n';
762 };
763 O << "PartwordMaskValues {\n";
764 O << " WordType: ";
765 PrintObj(PMV.WordType);
766 O << " ValueType: ";
767 PrintObj(PMV.ValueType);
768 O << " AlignedAddr: ";
769 PrintObj(PMV.AlignedAddr);
770 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
771 O << " ShiftAmt: ";
772 PrintObj(PMV.ShiftAmt);
773 O << " Mask: ";
774 PrintObj(PMV.Mask);
775 O << " Inv_Mask: ";
776 PrintObj(PMV.Inv_Mask);
777 O << "}\n";
778 return O;
779}
780
781} // end anonymous namespace
782
783/// This is a helper function which builds instructions to provide
784/// values necessary for partword atomic operations. It takes an
785/// incoming address, Addr, and ValueType, and constructs the address,
786/// shift-amounts and masks needed to work with a larger value of size
787/// WordSize.
788///
789/// AlignedAddr: Addr rounded down to a multiple of WordSize
790///
791/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
792/// from AlignAddr for it to have the same value as if
793/// ValueType was loaded from Addr.
794///
795/// Mask: Value to mask with the value loaded from AlignAddr to
796/// include only the part that would've been loaded from Addr.
797///
798/// Inv_Mask: The inverse of Mask.
799static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
801 Value *Addr, Align AddrAlign,
802 unsigned MinWordSize) {
803 PartwordMaskValues PMV;
804
805 Module *M = I->getModule();
806 LLVMContext &Ctx = M->getContext();
807 const DataLayout &DL = M->getDataLayout();
808 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
809
810 PMV.ValueType = PMV.IntValueType = ValueType;
811 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
812 PMV.IntValueType =
813 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
814
815 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
816 : ValueType;
817 if (PMV.ValueType == PMV.WordType) {
818 PMV.AlignedAddr = Addr;
819 PMV.AlignedAddrAlignment = AddrAlign;
820 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
821 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
822 return PMV;
823 }
824
825 PMV.AlignedAddrAlignment = Align(MinWordSize);
826
827 assert(ValueSize < MinWordSize);
828
829 PointerType *PtrTy = cast<PointerType>(Addr->getType());
830 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
831 Value *PtrLSB;
832
833 if (AddrAlign < MinWordSize) {
834 PMV.AlignedAddr = Builder.CreateIntrinsic(
835 Intrinsic::ptrmask, {PtrTy, IntTy},
836 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
837 "AlignedAddr");
838
839 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
840 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
841 } else {
842 // If the alignment is high enough, the LSB are known 0.
843 PMV.AlignedAddr = Addr;
844 PtrLSB = ConstantInt::getNullValue(IntTy);
845 }
846
847 if (DL.isLittleEndian()) {
848 // turn bytes into bits
849 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
850 } else {
851 // turn bytes into bits, and count from the other side.
852 PMV.ShiftAmt = Builder.CreateShl(
853 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
854 }
855
856 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
857 PMV.Mask = Builder.CreateShl(
858 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
859 "Mask");
860
861 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
862
863 return PMV;
864}
865
866static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
867 const PartwordMaskValues &PMV) {
868 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
869 if (PMV.WordType == PMV.ValueType)
870 return WideWord;
871
872 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
873 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
874 return Builder.CreateBitCast(Trunc, PMV.ValueType);
875}
876
877static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
878 Value *Updated, const PartwordMaskValues &PMV) {
879 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
880 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
881 if (PMV.WordType == PMV.ValueType)
882 return Updated;
883
884 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
885
886 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
887 Value *Shift =
888 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
889 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
890 Value *Or = Builder.CreateOr(And, Shift, "inserted");
891 return Or;
892}
893
894/// Emit IR to implement a masked version of a given atomicrmw
895/// operation. (That is, only the bits under the Mask should be
896/// affected by the operation)
898 IRBuilderBase &Builder, Value *Loaded,
899 Value *Shifted_Inc, Value *Inc,
900 const PartwordMaskValues &PMV) {
901 // TODO: update to use
902 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
903 // to merge bits from two values without requiring PMV.Inv_Mask.
904 switch (Op) {
905 case AtomicRMWInst::Xchg: {
906 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
907 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
908 return FinalVal;
909 }
913 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
916 case AtomicRMWInst::Nand: {
917 // The other arithmetic ops need to be masked into place.
918 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
919 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
920 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
921 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
922 return FinalVal;
923 }
936 // Finally, other ops will operate on the full value, so truncate down to
937 // the original size, and expand out again after doing the
938 // operation. Bitcasts will be inserted for FP values.
939 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
940 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
941 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
942 return FinalVal;
943 }
944 default:
945 llvm_unreachable("Unknown atomic op");
946 }
947}
948
949/// Expand a sub-word atomicrmw operation into an appropriate
950/// word-sized operation.
951///
952/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
953/// way as a typical atomicrmw expansion. The only difference here is
954/// that the operation inside of the loop may operate upon only a
955/// part of the value.
956void AtomicExpandImpl::expandPartwordAtomicRMW(
958 // Widen And/Or/Xor and give the target another chance at expanding it.
962 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
963 return;
964 }
965 AtomicOrdering MemOpOrder = AI->getOrdering();
966 SyncScope::ID SSID = AI->getSyncScopeID();
967
968 ReplacementIRBuilder Builder(AI, *DL);
969
970 PartwordMaskValues PMV =
971 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
972 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
973
974 Value *ValOperand_Shifted = nullptr;
977 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
978 ValOperand_Shifted =
979 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
980 "ValOperand_Shifted");
981 }
982
983 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
984 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
985 AI->getValOperand(), PMV);
986 };
987
988 Value *OldResult;
990 OldResult = insertRMWCmpXchgLoop(
991 Builder, PMV.WordType, PMV.AlignedAddr, PMV.AlignedAddrAlignment,
992 MemOpOrder, SSID, PerformPartwordOp, createCmpXchgInstFun, AI);
993 } else {
995 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
996 PMV.AlignedAddrAlignment, MemOpOrder,
997 PerformPartwordOp);
998 }
999
1000 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1001 AI->replaceAllUsesWith(FinalOldResult);
1002 AI->eraseFromParent();
1003}
1004
1005// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1006AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1007 ReplacementIRBuilder Builder(AI, *DL);
1009
1011 Op == AtomicRMWInst::And) &&
1012 "Unable to widen operation");
1013
1014 PartwordMaskValues PMV =
1015 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1016 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1017
1018 Value *ValOperand_Shifted =
1019 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
1020 PMV.ShiftAmt, "ValOperand_Shifted");
1021
1022 Value *NewOperand;
1023
1024 if (Op == AtomicRMWInst::And)
1025 NewOperand =
1026 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
1027 else
1028 NewOperand = ValOperand_Shifted;
1029
1030 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1031 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1032 AI->getOrdering(), AI->getSyncScopeID());
1033
1034 copyMetadataForAtomic(*NewAI, *AI);
1035
1036 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1037 AI->replaceAllUsesWith(FinalOldResult);
1038 AI->eraseFromParent();
1039 return NewAI;
1040}
1041
1042bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1043 // The basic idea here is that we're expanding a cmpxchg of a
1044 // smaller memory size up to a word-sized cmpxchg. To do this, we
1045 // need to add a retry-loop for strong cmpxchg, so that
1046 // modifications to other parts of the word don't cause a spurious
1047 // failure.
1048
1049 // This generates code like the following:
1050 // [[Setup mask values PMV.*]]
1051 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1052 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1053 // %InitLoaded = load i32* %addr
1054 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1055 // br partword.cmpxchg.loop
1056 // partword.cmpxchg.loop:
1057 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1058 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1059 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1060 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1061 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1062 // i32 %FullWord_NewVal success_ordering failure_ordering
1063 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1064 // %Success = extractvalue { i32, i1 } %NewCI, 1
1065 // br i1 %Success, label %partword.cmpxchg.end,
1066 // label %partword.cmpxchg.failure
1067 // partword.cmpxchg.failure:
1068 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1069 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1070 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1071 // label %partword.cmpxchg.end
1072 // partword.cmpxchg.end:
1073 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1074 // %FinalOldVal = trunc i32 %tmp1 to i8
1075 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1076 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1077
1078 Value *Addr = CI->getPointerOperand();
1079 Value *Cmp = CI->getCompareOperand();
1080 Value *NewVal = CI->getNewValOperand();
1081
1082 BasicBlock *BB = CI->getParent();
1083 Function *F = BB->getParent();
1084 ReplacementIRBuilder Builder(CI, *DL);
1085 LLVMContext &Ctx = Builder.getContext();
1086
1087 BasicBlock *EndBB =
1088 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1089 auto FailureBB =
1090 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1091 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1092
1093 // The split call above "helpfully" added a branch at the end of BB
1094 // (to the wrong place).
1095 std::prev(BB->end())->eraseFromParent();
1096 Builder.SetInsertPoint(BB);
1097
1098 PartwordMaskValues PMV =
1099 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1100 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1101
1102 // Shift the incoming values over, into the right location in the word.
1103 Value *NewVal_Shifted =
1104 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1105 Value *Cmp_Shifted =
1106 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1107
1108 // Load the entire current word, and mask into place the expected and new
1109 // values
1110 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1111 InitLoaded->setVolatile(CI->isVolatile());
1112 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1113 Builder.CreateBr(LoopBB);
1114
1115 // partword.cmpxchg.loop:
1116 Builder.SetInsertPoint(LoopBB);
1117 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1118 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1119
1120 // Mask/Or the expected and new values into place in the loaded word.
1121 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1122 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1123 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1124 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1126 NewCI->setVolatile(CI->isVolatile());
1127 // When we're building a strong cmpxchg, we need a loop, so you
1128 // might think we could use a weak cmpxchg inside. But, using strong
1129 // allows the below comparison for ShouldContinue, and we're
1130 // expecting the underlying cmpxchg to be a machine instruction,
1131 // which is strong anyways.
1132 NewCI->setWeak(CI->isWeak());
1133
1134 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1135 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1136
1137 if (CI->isWeak())
1138 Builder.CreateBr(EndBB);
1139 else
1140 Builder.CreateCondBr(Success, EndBB, FailureBB);
1141
1142 // partword.cmpxchg.failure:
1143 Builder.SetInsertPoint(FailureBB);
1144 // Upon failure, verify that the masked-out part of the loaded value
1145 // has been modified. If it didn't, abort the cmpxchg, since the
1146 // masked-in part must've.
1147 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1148 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1149 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1150
1151 // Add the second value to the phi from above
1152 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1153
1154 // partword.cmpxchg.end:
1155 Builder.SetInsertPoint(CI);
1156
1157 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1158 Value *Res = PoisonValue::get(CI->getType());
1159 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1160 Res = Builder.CreateInsertValue(Res, Success, 1);
1161
1162 CI->replaceAllUsesWith(Res);
1163 CI->eraseFromParent();
1164 return true;
1165}
1166
1167void AtomicExpandImpl::expandAtomicOpToLLSC(
1168 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1169 AtomicOrdering MemOpOrder,
1170 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1171 ReplacementIRBuilder Builder(I, *DL);
1172 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1173 MemOpOrder, PerformOp);
1174
1175 I->replaceAllUsesWith(Loaded);
1176 I->eraseFromParent();
1177}
1178
1179void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1180 ReplacementIRBuilder Builder(AI, *DL);
1181
1182 PartwordMaskValues PMV =
1183 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1184 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1185
1186 // The value operand must be sign-extended for signed min/max so that the
1187 // target's signed comparison instructions can be used. Otherwise, just
1188 // zero-ext.
1189 Instruction::CastOps CastOp = Instruction::ZExt;
1190 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1191 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1192 CastOp = Instruction::SExt;
1193
1194 Value *ValOperand_Shifted = Builder.CreateShl(
1195 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1196 PMV.ShiftAmt, "ValOperand_Shifted");
1197 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1198 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1199 AI->getOrdering());
1200 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1201 AI->replaceAllUsesWith(FinalOldResult);
1202 AI->eraseFromParent();
1203}
1204
1205void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1206 AtomicCmpXchgInst *CI) {
1207 ReplacementIRBuilder Builder(CI, *DL);
1208
1209 PartwordMaskValues PMV = createMaskInstrs(
1210 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1211 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1212
1213 Value *CmpVal_Shifted = Builder.CreateShl(
1214 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1215 "CmpVal_Shifted");
1216 Value *NewVal_Shifted = Builder.CreateShl(
1217 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1218 "NewVal_Shifted");
1219 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1220 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1221 CI->getMergedOrdering());
1222 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1223 Value *Res = PoisonValue::get(CI->getType());
1224 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1225 Value *Success = Builder.CreateICmpEQ(
1226 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1227 Res = Builder.CreateInsertValue(Res, Success, 1);
1228
1229 CI->replaceAllUsesWith(Res);
1230 CI->eraseFromParent();
1231}
1232
1233Value *AtomicExpandImpl::insertRMWLLSCLoop(
1234 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1235 AtomicOrdering MemOpOrder,
1236 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1237 LLVMContext &Ctx = Builder.getContext();
1238 BasicBlock *BB = Builder.GetInsertBlock();
1239 Function *F = BB->getParent();
1240
1241 assert(AddrAlign >=
1242 F->getDataLayout().getTypeStoreSize(ResultTy) &&
1243 "Expected at least natural alignment at this point.");
1244
1245 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1246 //
1247 // The standard expansion we produce is:
1248 // [...]
1249 // atomicrmw.start:
1250 // %loaded = @load.linked(%addr)
1251 // %new = some_op iN %loaded, %incr
1252 // %stored = @store_conditional(%new, %addr)
1253 // %try_again = icmp i32 ne %stored, 0
1254 // br i1 %try_again, label %loop, label %atomicrmw.end
1255 // atomicrmw.end:
1256 // [...]
1257 BasicBlock *ExitBB =
1258 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1259 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1260
1261 // The split call above "helpfully" added a branch at the end of BB (to the
1262 // wrong place).
1263 std::prev(BB->end())->eraseFromParent();
1264 Builder.SetInsertPoint(BB);
1265 Builder.CreateBr(LoopBB);
1266
1267 // Start the main loop block now that we've taken care of the preliminaries.
1268 Builder.SetInsertPoint(LoopBB);
1269 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1270
1271 Value *NewVal = PerformOp(Builder, Loaded);
1272
1273 Value *StoreSuccess =
1274 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1275 Value *TryAgain = Builder.CreateICmpNE(
1276 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1277 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1278
1279 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1280 return Loaded;
1281}
1282
1283/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1284/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1285/// IR. As a migration step, we convert back to what use to be the standard
1286/// way to represent a pointer cmpxchg so that we can update backends one by
1287/// one.
1289AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1290 auto *M = CI->getModule();
1291 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1292 M->getDataLayout());
1293
1294 ReplacementIRBuilder Builder(CI, *DL);
1295
1296 Value *Addr = CI->getPointerOperand();
1297
1298 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1299 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1300
1301 auto *NewCI = Builder.CreateAtomicCmpXchg(
1302 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1303 CI->getFailureOrdering(), CI->getSyncScopeID());
1304 NewCI->setVolatile(CI->isVolatile());
1305 NewCI->setWeak(CI->isWeak());
1306 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1307
1308 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1309 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1310
1311 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1312
1313 Value *Res = PoisonValue::get(CI->getType());
1314 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1315 Res = Builder.CreateInsertValue(Res, Succ, 1);
1316
1317 CI->replaceAllUsesWith(Res);
1318 CI->eraseFromParent();
1319 return NewCI;
1320}
1321
1322bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1323 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1324 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1325 Value *Addr = CI->getPointerOperand();
1326 BasicBlock *BB = CI->getParent();
1327 Function *F = BB->getParent();
1328 LLVMContext &Ctx = F->getContext();
1329 // If shouldInsertFencesForAtomic() returns true, then the target does not
1330 // want to deal with memory orders, and emitLeading/TrailingFence should take
1331 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1332 // should preserve the ordering.
1333 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1334 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1336 : CI->getMergedOrdering();
1337
1338 // In implementations which use a barrier to achieve release semantics, we can
1339 // delay emitting this barrier until we know a store is actually going to be
1340 // attempted. The cost of this delay is that we need 2 copies of the block
1341 // emitting the load-linked, affecting code size.
1342 //
1343 // Ideally, this logic would be unconditional except for the minsize check
1344 // since in other cases the extra blocks naturally collapse down to the
1345 // minimal loop. Unfortunately, this puts too much stress on later
1346 // optimisations so we avoid emitting the extra logic in those cases too.
1347 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1348 SuccessOrder != AtomicOrdering::Monotonic &&
1349 SuccessOrder != AtomicOrdering::Acquire &&
1350 !F->hasMinSize();
1351
1352 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1353 // do it even on minsize.
1354 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1355
1356 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1357 //
1358 // The full expansion we produce is:
1359 // [...]
1360 // %aligned.addr = ...
1361 // cmpxchg.start:
1362 // %unreleasedload = @load.linked(%aligned.addr)
1363 // %unreleasedload.extract = extract value from %unreleasedload
1364 // %should_store = icmp eq %unreleasedload.extract, %desired
1365 // br i1 %should_store, label %cmpxchg.releasingstore,
1366 // label %cmpxchg.nostore
1367 // cmpxchg.releasingstore:
1368 // fence?
1369 // br label cmpxchg.trystore
1370 // cmpxchg.trystore:
1371 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1372 // [%releasedload, %cmpxchg.releasedload]
1373 // %updated.new = insert %new into %loaded.trystore
1374 // %stored = @store_conditional(%updated.new, %aligned.addr)
1375 // %success = icmp eq i32 %stored, 0
1376 // br i1 %success, label %cmpxchg.success,
1377 // label %cmpxchg.releasedload/%cmpxchg.failure
1378 // cmpxchg.releasedload:
1379 // %releasedload = @load.linked(%aligned.addr)
1380 // %releasedload.extract = extract value from %releasedload
1381 // %should_store = icmp eq %releasedload.extract, %desired
1382 // br i1 %should_store, label %cmpxchg.trystore,
1383 // label %cmpxchg.failure
1384 // cmpxchg.success:
1385 // fence?
1386 // br label %cmpxchg.end
1387 // cmpxchg.nostore:
1388 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1389 // [%releasedload,
1390 // %cmpxchg.releasedload/%cmpxchg.trystore]
1391 // @load_linked_fail_balance()?
1392 // br label %cmpxchg.failure
1393 // cmpxchg.failure:
1394 // fence?
1395 // br label %cmpxchg.end
1396 // cmpxchg.end:
1397 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1398 // [%loaded.trystore, %cmpxchg.trystore]
1399 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1400 // %loaded = extract value from %loaded.exit
1401 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1402 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1403 // [...]
1404 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1405 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1406 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1407 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1408 auto ReleasedLoadBB =
1409 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1410 auto TryStoreBB =
1411 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1412 auto ReleasingStoreBB =
1413 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1414 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1415
1416 ReplacementIRBuilder Builder(CI, *DL);
1417
1418 // The split call above "helpfully" added a branch at the end of BB (to the
1419 // wrong place), but we might want a fence too. It's easiest to just remove
1420 // the branch entirely.
1421 std::prev(BB->end())->eraseFromParent();
1422 Builder.SetInsertPoint(BB);
1423 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1424 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1425
1426 PartwordMaskValues PMV =
1427 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1428 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1429 Builder.CreateBr(StartBB);
1430
1431 // Start the main loop block now that we've taken care of the preliminaries.
1432 Builder.SetInsertPoint(StartBB);
1433 Value *UnreleasedLoad =
1434 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1435 Value *UnreleasedLoadExtract =
1436 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1437 Value *ShouldStore = Builder.CreateICmpEQ(
1438 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1439
1440 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1441 // jump straight past that fence instruction (if it exists).
1442 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1443
1444 Builder.SetInsertPoint(ReleasingStoreBB);
1445 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1446 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1447 Builder.CreateBr(TryStoreBB);
1448
1449 Builder.SetInsertPoint(TryStoreBB);
1450 PHINode *LoadedTryStore =
1451 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1452 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1453 Value *NewValueInsert =
1454 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1455 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1456 PMV.AlignedAddr, MemOpOrder);
1457 StoreSuccess = Builder.CreateICmpEQ(
1458 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1459 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1460 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1461 CI->isWeak() ? FailureBB : RetryBB);
1462
1463 Builder.SetInsertPoint(ReleasedLoadBB);
1464 Value *SecondLoad;
1465 if (HasReleasedLoadBB) {
1466 SecondLoad =
1467 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1468 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1469 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1470 CI->getCompareOperand(), "should_store");
1471
1472 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1473 // jump straight past that fence instruction (if it exists).
1474 Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1475 // Update PHI node in TryStoreBB.
1476 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1477 } else
1478 Builder.CreateUnreachable();
1479
1480 // Make sure later instructions don't get reordered with a fence if
1481 // necessary.
1482 Builder.SetInsertPoint(SuccessBB);
1483 if (ShouldInsertFencesForAtomic ||
1484 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1485 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1486 Builder.CreateBr(ExitBB);
1487
1488 Builder.SetInsertPoint(NoStoreBB);
1489 PHINode *LoadedNoStore =
1490 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1491 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1492 if (HasReleasedLoadBB)
1493 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1494
1495 // In the failing case, where we don't execute the store-conditional, the
1496 // target might want to balance out the load-linked with a dedicated
1497 // instruction (e.g., on ARM, clearing the exclusive monitor).
1498 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1499 Builder.CreateBr(FailureBB);
1500
1501 Builder.SetInsertPoint(FailureBB);
1502 PHINode *LoadedFailure =
1503 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1504 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1505 if (CI->isWeak())
1506 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1507 if (ShouldInsertFencesForAtomic)
1508 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1509 Builder.CreateBr(ExitBB);
1510
1511 // Finally, we have control-flow based knowledge of whether the cmpxchg
1512 // succeeded or not. We expose this to later passes by converting any
1513 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1514 // PHI.
1515 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1516 PHINode *LoadedExit =
1517 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1518 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1519 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1520 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1521 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1522 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1523
1524 // This is the "exit value" from the cmpxchg expansion. It may be of
1525 // a type wider than the one in the cmpxchg instruction.
1526 Value *LoadedFull = LoadedExit;
1527
1528 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1529 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1530
1531 // Look for any users of the cmpxchg that are just comparing the loaded value
1532 // against the desired one, and replace them with the CFG-derived version.
1534 for (auto *User : CI->users()) {
1535 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1536 if (!EV)
1537 continue;
1538
1539 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1540 "weird extraction from { iN, i1 }");
1541
1542 if (EV->getIndices()[0] == 0)
1543 EV->replaceAllUsesWith(Loaded);
1544 else
1546
1547 PrunedInsts.push_back(EV);
1548 }
1549
1550 // We can remove the instructions now we're no longer iterating through them.
1551 for (auto *EV : PrunedInsts)
1552 EV->eraseFromParent();
1553
1554 if (!CI->use_empty()) {
1555 // Some use of the full struct return that we don't understand has happened,
1556 // so we've got to reconstruct it properly.
1557 Value *Res;
1558 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1559 Res = Builder.CreateInsertValue(Res, Success, 1);
1560
1561 CI->replaceAllUsesWith(Res);
1562 }
1563
1564 CI->eraseFromParent();
1565 return true;
1566}
1567
1568bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1569 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1570 if (!C)
1571 return false;
1572
1574 switch (Op) {
1575 case AtomicRMWInst::Add:
1576 case AtomicRMWInst::Sub:
1577 case AtomicRMWInst::Or:
1578 case AtomicRMWInst::Xor:
1579 return C->isZero();
1580 case AtomicRMWInst::And:
1581 return C->isMinusOne();
1582 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1583 default:
1584 return false;
1585 }
1586}
1587
1588bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1589 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1590 tryExpandAtomicLoad(ResultingLoad);
1591 return true;
1592 }
1593 return false;
1594}
1595
1596Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1597 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1598 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1599 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1600 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1601 LLVMContext &Ctx = Builder.getContext();
1602 BasicBlock *BB = Builder.GetInsertBlock();
1603 Function *F = BB->getParent();
1604
1605 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1606 //
1607 // The standard expansion we produce is:
1608 // [...]
1609 // %init_loaded = load atomic iN* %addr
1610 // br label %loop
1611 // loop:
1612 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1613 // %new = some_op iN %loaded, %incr
1614 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1615 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1616 // %success = extractvalue { iN, i1 } %pair, 1
1617 // br i1 %success, label %atomicrmw.end, label %loop
1618 // atomicrmw.end:
1619 // [...]
1620 BasicBlock *ExitBB =
1621 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1622 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1623
1624 // The split call above "helpfully" added a branch at the end of BB (to the
1625 // wrong place), but we want a load. It's easiest to just remove
1626 // the branch entirely.
1627 std::prev(BB->end())->eraseFromParent();
1628 Builder.SetInsertPoint(BB);
1629 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1630 Builder.CreateBr(LoopBB);
1631
1632 // Start the main loop block now that we've taken care of the preliminaries.
1633 Builder.SetInsertPoint(LoopBB);
1634 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1635 Loaded->addIncoming(InitLoaded, BB);
1636
1637 Value *NewVal = PerformOp(Builder, Loaded);
1638
1639 Value *NewLoaded = nullptr;
1640 Value *Success = nullptr;
1641
1642 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1643 MemOpOrder == AtomicOrdering::Unordered
1645 : MemOpOrder,
1646 SSID, Success, NewLoaded, MetadataSrc);
1647 assert(Success && NewLoaded);
1648
1649 Loaded->addIncoming(NewLoaded, LoopBB);
1650
1651 Builder.CreateCondBr(Success, ExitBB, LoopBB);
1652
1653 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1654 return NewLoaded;
1655}
1656
1657bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1658 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1659 unsigned ValueSize = getAtomicOpSize(CI);
1660
1661 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1662 default:
1663 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1665 if (ValueSize < MinCASSize)
1666 return expandPartwordCmpXchg(CI);
1667 return false;
1669 return expandAtomicCmpXchg(CI);
1670 }
1672 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1673 return true;
1675 return lowerAtomicCmpXchgInst(CI);
1677 TLI->emitExpandAtomicCmpXchg(CI);
1678 return true;
1679 }
1680 }
1681}
1682
1683// Note: This function is exposed externally by AtomicExpandUtils.h
1685 CreateCmpXchgInstFun CreateCmpXchg) {
1686 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1687 Builder.setIsFPConstrained(
1688 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1689
1690 // FIXME: If FP exceptions are observable, we should force them off for the
1691 // loop for the FP atomics.
1692 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1693 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1694 AI->getOrdering(), AI->getSyncScopeID(),
1695 [&](IRBuilderBase &Builder, Value *Loaded) {
1696 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1697 AI->getValOperand());
1698 },
1699 CreateCmpXchg, /*MetadataSrc=*/AI);
1700
1701 AI->replaceAllUsesWith(Loaded);
1702 AI->eraseFromParent();
1703 return true;
1704}
1705
1706// In order to use one of the sized library calls such as
1707// __atomic_fetch_add_4, the alignment must be sufficient, the size
1708// must be one of the potentially-specialized sizes, and the value
1709// type must actually exist in C on the target (otherwise, the
1710// function wouldn't actually be defined.)
1711static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1712 const DataLayout &DL) {
1713 // TODO: "LargestSize" is an approximation for "largest type that
1714 // you can express in C". It seems to be the case that int128 is
1715 // supported on all 64-bit platforms, otherwise only up to 64-bit
1716 // integers are supported. If we get this wrong, then we'll try to
1717 // call a sized libcall that doesn't actually exist. There should
1718 // really be some more reliable way in LLVM of determining integer
1719 // sizes which are valid in the target's C ABI...
1720 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1721 return Alignment >= Size &&
1722 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1723 Size <= LargestSize;
1724}
1725
1726void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1727 static const RTLIB::Libcall Libcalls[6] = {
1728 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1729 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1730 unsigned Size = getAtomicOpSize(I);
1731
1732 bool expanded = expandAtomicOpToLibcall(
1733 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1734 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1735 if (!expanded)
1736 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
1737}
1738
1739void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1740 static const RTLIB::Libcall Libcalls[6] = {
1741 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1742 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1743 unsigned Size = getAtomicOpSize(I);
1744
1745 bool expanded = expandAtomicOpToLibcall(
1746 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1747 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1748 if (!expanded)
1749 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
1750}
1751
1752void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1753 static const RTLIB::Libcall Libcalls[6] = {
1754 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1755 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1756 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1757 unsigned Size = getAtomicOpSize(I);
1758
1759 bool expanded = expandAtomicOpToLibcall(
1760 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1761 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1762 Libcalls);
1763 if (!expanded)
1764 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
1765}
1766
1768 static const RTLIB::Libcall LibcallsXchg[6] = {
1769 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1770 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1771 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1772 static const RTLIB::Libcall LibcallsAdd[6] = {
1773 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1774 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1775 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1776 static const RTLIB::Libcall LibcallsSub[6] = {
1777 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1778 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1779 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1780 static const RTLIB::Libcall LibcallsAnd[6] = {
1781 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1782 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1783 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1784 static const RTLIB::Libcall LibcallsOr[6] = {
1785 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1786 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1787 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1788 static const RTLIB::Libcall LibcallsXor[6] = {
1789 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1790 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1791 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1792 static const RTLIB::Libcall LibcallsNand[6] = {
1793 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1794 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1795 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1796
1797 switch (Op) {
1799 llvm_unreachable("Should not have BAD_BINOP.");
1801 return ArrayRef(LibcallsXchg);
1802 case AtomicRMWInst::Add:
1803 return ArrayRef(LibcallsAdd);
1804 case AtomicRMWInst::Sub:
1805 return ArrayRef(LibcallsSub);
1806 case AtomicRMWInst::And:
1807 return ArrayRef(LibcallsAnd);
1808 case AtomicRMWInst::Or:
1809 return ArrayRef(LibcallsOr);
1810 case AtomicRMWInst::Xor:
1811 return ArrayRef(LibcallsXor);
1813 return ArrayRef(LibcallsNand);
1814 case AtomicRMWInst::Max:
1815 case AtomicRMWInst::Min:
1826 // No atomic libcalls are available for these.
1827 return {};
1828 }
1829 llvm_unreachable("Unexpected AtomicRMW operation.");
1830}
1831
1832void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1833 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1834
1835 unsigned Size = getAtomicOpSize(I);
1836
1837 bool Success = false;
1838 if (!Libcalls.empty())
1839 Success = expandAtomicOpToLibcall(
1840 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1841 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1842
1843 // The expansion failed: either there were no libcalls at all for
1844 // the operation (min/max), or there were only size-specialized
1845 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1846 // CAS libcall, via a CAS loop, instead.
1847 if (!Success) {
1849 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1850 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1851 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded,
1852 Instruction *MetadataSrc) {
1853 // Create the CAS instruction normally...
1854 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1855 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1857 if (MetadataSrc)
1858 copyMetadataForAtomic(*Pair, *MetadataSrc);
1859
1860 Success = Builder.CreateExtractValue(Pair, 1, "success");
1861 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1862
1863 // ...and then expand the CAS into a libcall.
1864 expandAtomicCASToLibcall(Pair);
1865 });
1866 }
1867}
1868
1869// A helper routine for the above expandAtomic*ToLibcall functions.
1870//
1871// 'Libcalls' contains an array of enum values for the particular
1872// ATOMIC libcalls to be emitted. All of the other arguments besides
1873// 'I' are extracted from the Instruction subclass by the
1874// caller. Depending on the particular call, some will be null.
1875bool AtomicExpandImpl::expandAtomicOpToLibcall(
1876 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1877 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1878 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1879 assert(Libcalls.size() == 6);
1880
1881 LLVMContext &Ctx = I->getContext();
1882 Module *M = I->getModule();
1883 const DataLayout &DL = M->getDataLayout();
1884 IRBuilder<> Builder(I);
1885 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1886
1887 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1888 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1889
1890 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1891
1892 // TODO: the "order" argument type is "int", not int32. So
1893 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1894 ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1895 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1896 Constant *OrderingVal =
1897 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1898 Constant *Ordering2Val = nullptr;
1899 if (CASExpected) {
1900 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1901 Ordering2Val =
1902 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1903 }
1904 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1905
1906 RTLIB::Libcall RTLibType;
1907 if (UseSizedLibcall) {
1908 switch (Size) {
1909 case 1:
1910 RTLibType = Libcalls[1];
1911 break;
1912 case 2:
1913 RTLibType = Libcalls[2];
1914 break;
1915 case 4:
1916 RTLibType = Libcalls[3];
1917 break;
1918 case 8:
1919 RTLibType = Libcalls[4];
1920 break;
1921 case 16:
1922 RTLibType = Libcalls[5];
1923 break;
1924 }
1925 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1926 RTLibType = Libcalls[0];
1927 } else {
1928 // Can't use sized function, and there's no generic for this
1929 // operation, so give up.
1930 return false;
1931 }
1932
1933 if (!TLI->getLibcallName(RTLibType)) {
1934 // This target does not implement the requested atomic libcall so give up.
1935 return false;
1936 }
1937
1938 // Build up the function call. There's two kinds. First, the sized
1939 // variants. These calls are going to be one of the following (with
1940 // N=1,2,4,8,16):
1941 // iN __atomic_load_N(iN *ptr, int ordering)
1942 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1943 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1944 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1945 // int success_order, int failure_order)
1946 //
1947 // Note that these functions can be used for non-integer atomic
1948 // operations, the values just need to be bitcast to integers on the
1949 // way in and out.
1950 //
1951 // And, then, the generic variants. They look like the following:
1952 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1953 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1954 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1955 // int ordering)
1956 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1957 // void *desired, int success_order,
1958 // int failure_order)
1959 //
1960 // The different signatures are built up depending on the
1961 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1962 // variables.
1963
1964 AllocaInst *AllocaCASExpected = nullptr;
1965 AllocaInst *AllocaValue = nullptr;
1966 AllocaInst *AllocaResult = nullptr;
1967
1968 Type *ResultTy;
1970 AttributeList Attr;
1971
1972 // 'size' argument.
1973 if (!UseSizedLibcall) {
1974 // Note, getIntPtrType is assumed equivalent to size_t.
1975 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1976 }
1977
1978 // 'ptr' argument.
1979 // note: This assumes all address spaces share a common libfunc
1980 // implementation and that addresses are convertable. For systems without
1981 // that property, we'd need to extend this mechanism to support AS-specific
1982 // families of atomic intrinsics.
1983 Value *PtrVal = PointerOperand;
1984 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
1985 Args.push_back(PtrVal);
1986
1987 // 'expected' argument, if present.
1988 if (CASExpected) {
1989 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1990 AllocaCASExpected->setAlignment(AllocaAlignment);
1991 Builder.CreateLifetimeStart(AllocaCASExpected, SizeVal64);
1992 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1993 Args.push_back(AllocaCASExpected);
1994 }
1995
1996 // 'val' argument ('desired' for cas), if present.
1997 if (ValueOperand) {
1998 if (UseSizedLibcall) {
1999 Value *IntValue =
2000 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
2001 Args.push_back(IntValue);
2002 } else {
2003 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
2004 AllocaValue->setAlignment(AllocaAlignment);
2005 Builder.CreateLifetimeStart(AllocaValue, SizeVal64);
2006 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
2007 Args.push_back(AllocaValue);
2008 }
2009 }
2010
2011 // 'ret' argument.
2012 if (!CASExpected && HasResult && !UseSizedLibcall) {
2013 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
2014 AllocaResult->setAlignment(AllocaAlignment);
2015 Builder.CreateLifetimeStart(AllocaResult, SizeVal64);
2016 Args.push_back(AllocaResult);
2017 }
2018
2019 // 'ordering' ('success_order' for cas) argument.
2020 Args.push_back(OrderingVal);
2021
2022 // 'failure_order' argument, if present.
2023 if (Ordering2Val)
2024 Args.push_back(Ordering2Val);
2025
2026 // Now, the return type.
2027 if (CASExpected) {
2028 ResultTy = Type::getInt1Ty(Ctx);
2029 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
2030 } else if (HasResult && UseSizedLibcall)
2031 ResultTy = SizedIntTy;
2032 else
2033 ResultTy = Type::getVoidTy(Ctx);
2034
2035 // Done with setting up arguments and return types, create the call:
2037 for (Value *Arg : Args)
2038 ArgTys.push_back(Arg->getType());
2039 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
2040 FunctionCallee LibcallFn =
2041 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
2042 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
2043 Call->setAttributes(Attr);
2044 Value *Result = Call;
2045
2046 // And then, extract the results...
2047 if (ValueOperand && !UseSizedLibcall)
2048 Builder.CreateLifetimeEnd(AllocaValue, SizeVal64);
2049
2050 if (CASExpected) {
2051 // The final result from the CAS is {load of 'expected' alloca, bool result
2052 // from call}
2053 Type *FinalResultTy = I->getType();
2054 Value *V = PoisonValue::get(FinalResultTy);
2055 Value *ExpectedOut = Builder.CreateAlignedLoad(
2056 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2057 Builder.CreateLifetimeEnd(AllocaCASExpected, SizeVal64);
2058 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2059 V = Builder.CreateInsertValue(V, Result, 1);
2060 I->replaceAllUsesWith(V);
2061 } else if (HasResult) {
2062 Value *V;
2063 if (UseSizedLibcall)
2064 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2065 else {
2066 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2067 AllocaAlignment);
2068 Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
2069 }
2070 I->replaceAllUsesWith(V);
2071 }
2072 I->eraseFromParent();
2073 return true;
2074}
#define Success
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded, Instruction *MetadataSrc)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
#define DEBUG_TYPE
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:282
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(...)
Definition: Debug.h:106
uint64_t Addr
uint64_t Size
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
Definition: Instructions.h:63
void setAlignment(Align Align)
Definition: Instructions.h:128
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
Definition: Instructions.h:607
void setWeak(bool IsWeak)
Definition: Instructions.h:564
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:555
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:594
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:652
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:544
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:562
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:559
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:582
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:620
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:827
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
Definition: Instructions.h:837
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716
@ Add
*p = old + v
Definition: Instructions.h:720
@ FAdd
*p = old + v
Definition: Instructions.h:741
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:734
@ Or
*p = old | v
Definition: Instructions.h:728
@ Sub
*p = old - v
Definition: Instructions.h:722
@ And
*p = old & v
Definition: Instructions.h:724
@ Xor
*p = old ^ v
Definition: Instructions.h:730
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768
@ FSub
*p = old - v
Definition: Instructions.h:744
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:732
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:738
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:752
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:736
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:748
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
Value * getPointerOperand()
Definition: Instructions.h:870
BinOp getOperation() const
Definition: Instructions.h:805
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:861
Value * getValOperand()
Definition: Instructions.h:874
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:847
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
Definition: Attributes.h:602
LLVM Basic Block Representation.
Definition: BasicBlock.h:61
iterator end()
Definition: BasicBlock.h:461
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448
reverse_iterator rbegin()
Definition: BasicBlock.h:464
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:577
InstListType::reverse_iterator reverse_iterator
Definition: BasicBlock.h:179
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
reverse_iterator rend()
Definition: BasicBlock.h:466
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:83
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:866
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:873
This is an important base class in LLVM.
Definition: Constant.h:42
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
BasicBlockListType::iterator iterator
Definition: Function.h:68
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1864
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2554
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:536
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1830
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1286
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:460
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2547
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:890
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:172
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2150
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1460
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2277
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:230
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2236
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2429
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1772
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2273
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition: IRBuilder.h:314
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2155
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1144
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1813
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1439
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2048
LLVMContext & getContext() const
Definition: IRBuilder.h:173
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1498
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2189
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2145
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2444
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1877
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2034
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:472
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1520
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1138
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:177
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1849
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1542
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2160
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition: IRBuilder.h:74
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2697
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:66
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:92
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:74
Class to represent integer types.
Definition: DerivedTypes.h:42
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
unsigned getMDKindID(StringRef Name) const
getMDKindID - Return a unique non-zero ID for the specified metadata kind.
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Definition: Instructions.h:176
Value * getPointerOperand()
Definition: Instructions.h:255
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:205
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:241
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:220
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:208
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:230
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211
Metadata node.
Definition: Metadata.h:1069
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:302
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:686
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117
bool empty() const
Definition: SmallVector.h:81
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
An instruction for storing to memory.
Definition: Instructions.h:292
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:328
void setAlignment(Align Align)
Definition: Instructions.h:337
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:364
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition: ilist_node.h:32
self_iterator getIterator()
Definition: ilist_node.h:132
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
void initializeAtomicExpandLegacyPass(PassRegistry &)
bool canInstructionHaveMMRAs(const Instruction &I)
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
Definition: LowerAtomic.cpp:52
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
Definition: LowerAtomic.cpp:22
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:407