LLVM 18.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Module.h"
40#include "llvm/IR/Type.h"
41#include "llvm/IR/User.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
47#include "llvm/Support/Debug.h"
52#include <cassert>
53#include <cstdint>
54#include <iterator>
55
56using namespace llvm;
57
58#define DEBUG_TYPE "atomic-expand"
59
60namespace {
61
62class AtomicExpand : public FunctionPass {
63 const TargetLowering *TLI = nullptr;
64 const DataLayout *DL = nullptr;
65
66public:
67 static char ID; // Pass identification, replacement for typeid
68
69 AtomicExpand() : FunctionPass(ID) {
71 }
72
73 bool runOnFunction(Function &F) override;
74
75private:
76 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
77 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
78 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
79 bool tryExpandAtomicLoad(LoadInst *LI);
80 bool expandAtomicLoadToLL(LoadInst *LI);
81 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
82 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
83 bool tryExpandAtomicStore(StoreInst *SI);
84 void expandAtomicStore(StoreInst *SI);
85 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
86 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
87 Value *
88 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
89 Align AddrAlign, AtomicOrdering MemOpOrder,
90 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
91 void expandAtomicOpToLLSC(
92 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
93 AtomicOrdering MemOpOrder,
94 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
95 void expandPartwordAtomicRMW(
97 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
98 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
99 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
100 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
101
102 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
103 static Value *insertRMWCmpXchgLoop(
104 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
105 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
106 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
107 CreateCmpXchgInstFun CreateCmpXchg);
108 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
109
110 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
111 bool isIdempotentRMW(AtomicRMWInst *RMWI);
112 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
113
114 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
115 Value *PointerOperand, Value *ValueOperand,
116 Value *CASExpected, AtomicOrdering Ordering,
117 AtomicOrdering Ordering2,
118 ArrayRef<RTLIB::Libcall> Libcalls);
119 void expandAtomicLoadToLibcall(LoadInst *LI);
120 void expandAtomicStoreToLibcall(StoreInst *LI);
121 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
122 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
123
124 friend bool
126 CreateCmpXchgInstFun CreateCmpXchg);
127};
128
129// IRBuilder to be used for replacement atomic instructions.
130struct ReplacementIRBuilder : IRBuilder<InstSimplifyFolder> {
131 // Preserves the DebugLoc from I, and preserves still valid metadata.
132 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
133 : IRBuilder(I->getContext(), DL) {
135 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
136 }
137};
138
139} // end anonymous namespace
140
141char AtomicExpand::ID = 0;
142
143char &llvm::AtomicExpandID = AtomicExpand::ID;
144
145INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false,
146 false)
147
148FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
149
150// Helper functions to retrieve the size of atomic instructions.
151static unsigned getAtomicOpSize(LoadInst *LI) {
152 const DataLayout &DL = LI->getModule()->getDataLayout();
153 return DL.getTypeStoreSize(LI->getType());
154}
155
156static unsigned getAtomicOpSize(StoreInst *SI) {
157 const DataLayout &DL = SI->getModule()->getDataLayout();
158 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
159}
160
161static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
162 const DataLayout &DL = RMWI->getModule()->getDataLayout();
163 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
164}
165
166static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
167 const DataLayout &DL = CASI->getModule()->getDataLayout();
168 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
169}
170
171// Determine if a particular atomic operation has a supported size,
172// and is of appropriate alignment, to be passed through for target
173// lowering. (Versus turning into a __atomic libcall)
174template <typename Inst>
175static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
176 unsigned Size = getAtomicOpSize(I);
177 Align Alignment = I->getAlign();
178 return Alignment >= Size &&
180}
181
182bool AtomicExpand::runOnFunction(Function &F) {
183 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
184 if (!TPC)
185 return false;
186
187 auto &TM = TPC->getTM<TargetMachine>();
188 const auto *Subtarget = TM.getSubtargetImpl(F);
189 if (!Subtarget->enableAtomicExpand())
190 return false;
191 TLI = Subtarget->getTargetLowering();
192 DL = &F.getParent()->getDataLayout();
193
195
196 // Changing control-flow while iterating through it is a bad idea, so gather a
197 // list of all atomic instructions before we start.
198 for (Instruction &I : instructions(F))
199 if (I.isAtomic() && !isa<FenceInst>(&I))
200 AtomicInsts.push_back(&I);
201
202 bool MadeChange = false;
203 for (auto *I : AtomicInsts) {
204 auto LI = dyn_cast<LoadInst>(I);
205 auto SI = dyn_cast<StoreInst>(I);
206 auto RMWI = dyn_cast<AtomicRMWInst>(I);
207 auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
208 assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
209
210 // If the Size/Alignment is not supported, replace with a libcall.
211 if (LI) {
212 if (!atomicSizeSupported(TLI, LI)) {
213 expandAtomicLoadToLibcall(LI);
214 MadeChange = true;
215 continue;
216 }
217 } else if (SI) {
218 if (!atomicSizeSupported(TLI, SI)) {
219 expandAtomicStoreToLibcall(SI);
220 MadeChange = true;
221 continue;
222 }
223 } else if (RMWI) {
224 if (!atomicSizeSupported(TLI, RMWI)) {
225 expandAtomicRMWToLibcall(RMWI);
226 MadeChange = true;
227 continue;
228 }
229 } else if (CASI) {
230 if (!atomicSizeSupported(TLI, CASI)) {
231 expandAtomicCASToLibcall(CASI);
232 MadeChange = true;
233 continue;
234 }
235 }
236
237 if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
238 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
239 I = LI = convertAtomicLoadToIntegerType(LI);
240 MadeChange = true;
241 } else if (SI &&
242 TLI->shouldCastAtomicStoreInIR(SI) ==
243 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
244 I = SI = convertAtomicStoreToIntegerType(SI);
245 MadeChange = true;
246 } else if (RMWI &&
247 TLI->shouldCastAtomicRMWIInIR(RMWI) ==
248 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
249 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
250 MadeChange = true;
251 } else if (CASI) {
252 // TODO: when we're ready to make the change at the IR level, we can
253 // extend convertCmpXchgToInteger for floating point too.
254 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
255 // TODO: add a TLI hook to control this so that each target can
256 // convert to lowering the original type one at a time.
257 I = CASI = convertCmpXchgToIntegerType(CASI);
258 MadeChange = true;
259 }
260 }
261
262 if (TLI->shouldInsertFencesForAtomic(I)) {
263 auto FenceOrdering = AtomicOrdering::Monotonic;
264 if (LI && isAcquireOrStronger(LI->getOrdering())) {
265 FenceOrdering = LI->getOrdering();
266 LI->setOrdering(AtomicOrdering::Monotonic);
267 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
268 FenceOrdering = SI->getOrdering();
269 SI->setOrdering(AtomicOrdering::Monotonic);
270 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
271 isAcquireOrStronger(RMWI->getOrdering()))) {
272 FenceOrdering = RMWI->getOrdering();
273 RMWI->setOrdering(AtomicOrdering::Monotonic);
274 } else if (CASI &&
275 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
276 TargetLoweringBase::AtomicExpansionKind::None &&
277 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
278 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
279 isAcquireOrStronger(CASI->getFailureOrdering()))) {
280 // If a compare and swap is lowered to LL/SC, we can do smarter fence
281 // insertion, with a stronger one on the success path than on the
282 // failure path. As a result, fence insertion is directly done by
283 // expandAtomicCmpXchg in that case.
284 FenceOrdering = CASI->getMergedOrdering();
285 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
286 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
287 }
288
289 if (FenceOrdering != AtomicOrdering::Monotonic) {
290 MadeChange |= bracketInstWithFences(I, FenceOrdering);
291 }
292 } else if (I->hasAtomicStore() &&
293 TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
294 auto FenceOrdering = AtomicOrdering::Monotonic;
295 if (SI)
296 FenceOrdering = SI->getOrdering();
297 else if (RMWI)
298 FenceOrdering = RMWI->getOrdering();
299 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
300 TargetLoweringBase::AtomicExpansionKind::LLSC)
301 // LLSC is handled in expandAtomicCmpXchg().
302 FenceOrdering = CASI->getSuccessOrdering();
303
304 IRBuilder Builder(I);
305 if (auto TrailingFence =
306 TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
307 TrailingFence->moveAfter(I);
308 MadeChange = true;
309 }
310 }
311
312 if (LI)
313 MadeChange |= tryExpandAtomicLoad(LI);
314 else if (SI)
315 MadeChange |= tryExpandAtomicStore(SI);
316 else if (RMWI) {
317 // There are two different ways of expanding RMW instructions:
318 // - into a load if it is idempotent
319 // - into a Cmpxchg/LL-SC loop otherwise
320 // we try them in that order.
321
322 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
323 MadeChange = true;
324 } else {
325 AtomicRMWInst::BinOp Op = RMWI->getOperation();
326 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
327 unsigned ValueSize = getAtomicOpSize(RMWI);
328 if (ValueSize < MinCASSize &&
331 RMWI = widenPartwordAtomicRMW(RMWI);
332 MadeChange = true;
333 }
334
335 MadeChange |= tryExpandAtomicRMW(RMWI);
336 }
337 } else if (CASI)
338 MadeChange |= tryExpandAtomicCmpXchg(CASI);
339 }
340 return MadeChange;
341}
342
343bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
344 ReplacementIRBuilder Builder(I, *DL);
345
346 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
347
348 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
349 // We have a guard here because not every atomic operation generates a
350 // trailing fence.
351 if (TrailingFence)
352 TrailingFence->moveAfter(I);
353
354 return (LeadingFence || TrailingFence);
355}
356
357/// Get the iX type with the same bitwidth as T.
358IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
359 const DataLayout &DL) {
360 EVT VT = TLI->getMemValueType(DL, T);
361 unsigned BitWidth = VT.getStoreSizeInBits();
362 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
363 return IntegerType::get(T->getContext(), BitWidth);
364}
365
366/// Convert an atomic load of a non-integral type to an integer load of the
367/// equivalent bitwidth. See the function comment on
368/// convertAtomicStoreToIntegerType for background.
369LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
370 auto *M = LI->getModule();
371 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
372
373 ReplacementIRBuilder Builder(LI, *DL);
374
376
377 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
378 NewLI->setAlignment(LI->getAlign());
379 NewLI->setVolatile(LI->isVolatile());
380 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
381 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
382
383 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
384 LI->replaceAllUsesWith(NewVal);
385 LI->eraseFromParent();
386 return NewLI;
387}
388
390AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
391 auto *M = RMWI->getModule();
392 Type *NewTy =
393 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
394
395 ReplacementIRBuilder Builder(RMWI, *DL);
396
397 Value *Addr = RMWI->getPointerOperand();
398 Value *Val = RMWI->getValOperand();
399 Value *NewVal = Val->getType()->isPointerTy()
400 ? Builder.CreatePtrToInt(Val, NewTy)
401 : Builder.CreateBitCast(Val, NewTy);
402
403 auto *NewRMWI =
404 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
405 RMWI->getAlign(), RMWI->getOrdering());
406 NewRMWI->setVolatile(RMWI->isVolatile());
407 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
408
409 Value *NewRVal = RMWI->getType()->isPointerTy()
410 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
411 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
412 RMWI->replaceAllUsesWith(NewRVal);
413 RMWI->eraseFromParent();
414 return NewRMWI;
415}
416
417bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
418 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
419 case TargetLoweringBase::AtomicExpansionKind::None:
420 return false;
421 case TargetLoweringBase::AtomicExpansionKind::LLSC:
422 expandAtomicOpToLLSC(
423 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
424 LI->getOrdering(),
425 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
426 return true;
427 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
428 return expandAtomicLoadToLL(LI);
429 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
430 return expandAtomicLoadToCmpXchg(LI);
431 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
432 LI->setAtomic(AtomicOrdering::NotAtomic);
433 return true;
434 default:
435 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
436 }
437}
438
439bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) {
440 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
441 case TargetLoweringBase::AtomicExpansionKind::None:
442 return false;
443 case TargetLoweringBase::AtomicExpansionKind::Expand:
444 expandAtomicStore(SI);
445 return true;
446 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
447 SI->setAtomic(AtomicOrdering::NotAtomic);
448 return true;
449 default:
450 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
451 }
452}
453
454bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
455 ReplacementIRBuilder Builder(LI, *DL);
456
457 // On some architectures, load-linked instructions are atomic for larger
458 // sizes than normal loads. For example, the only 64-bit load guaranteed
459 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
460 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
461 LI->getPointerOperand(), LI->getOrdering());
462 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
463
464 LI->replaceAllUsesWith(Val);
465 LI->eraseFromParent();
466
467 return true;
468}
469
470bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
471 ReplacementIRBuilder Builder(LI, *DL);
472 AtomicOrdering Order = LI->getOrdering();
473 if (Order == AtomicOrdering::Unordered)
474 Order = AtomicOrdering::Monotonic;
475
477 Type *Ty = LI->getType();
478 Constant *DummyVal = Constant::getNullValue(Ty);
479
480 Value *Pair = Builder.CreateAtomicCmpXchg(
481 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
483 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
484
485 LI->replaceAllUsesWith(Loaded);
486 LI->eraseFromParent();
487
488 return true;
489}
490
491/// Convert an atomic store of a non-integral type to an integer store of the
492/// equivalent bitwidth. We used to not support floating point or vector
493/// atomics in the IR at all. The backends learned to deal with the bitcast
494/// idiom because that was the only way of expressing the notion of a atomic
495/// float or vector store. The long term plan is to teach each backend to
496/// instruction select from the original atomic store, but as a migration
497/// mechanism, we convert back to the old format which the backends understand.
498/// Each backend will need individual work to recognize the new format.
499StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
500 ReplacementIRBuilder Builder(SI, *DL);
501 auto *M = SI->getModule();
502 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
503 M->getDataLayout());
504 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
505
506 Value *Addr = SI->getPointerOperand();
507
508 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
509 NewSI->setAlignment(SI->getAlign());
510 NewSI->setVolatile(SI->isVolatile());
511 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
512 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
513 SI->eraseFromParent();
514 return NewSI;
515}
516
517void AtomicExpand::expandAtomicStore(StoreInst *SI) {
518 // This function is only called on atomic stores that are too large to be
519 // atomic if implemented as a native store. So we replace them by an
520 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
521 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
522 // It is the responsibility of the target to only signal expansion via
523 // shouldExpandAtomicRMW in cases where this is required and possible.
524 ReplacementIRBuilder Builder(SI, *DL);
525 AtomicOrdering Ordering = SI->getOrdering();
526 assert(Ordering != AtomicOrdering::NotAtomic);
527 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
528 ? AtomicOrdering::Monotonic
529 : Ordering;
530 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
531 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
532 SI->getAlign(), RMWOrdering);
533 SI->eraseFromParent();
534
535 // Now we have an appropriate swap instruction, lower it as usual.
536 tryExpandAtomicRMW(AI);
537}
538
540 Value *Loaded, Value *NewVal, Align AddrAlign,
541 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
542 Value *&Success, Value *&NewLoaded) {
543 Type *OrigTy = NewVal->getType();
544
545 // This code can go away when cmpxchg supports FP types.
546 assert(!OrigTy->isPointerTy());
547 bool NeedBitcast = OrigTy->isFloatingPointTy();
548 if (NeedBitcast) {
549 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
550 NewVal = Builder.CreateBitCast(NewVal, IntTy);
551 Loaded = Builder.CreateBitCast(Loaded, IntTy);
552 }
553
554 Value *Pair = Builder.CreateAtomicCmpXchg(
555 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
557 Success = Builder.CreateExtractValue(Pair, 1, "success");
558 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
559
560 if (NeedBitcast)
561 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
562}
563
564bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
565 LLVMContext &Ctx = AI->getModule()->getContext();
566 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
567 switch (Kind) {
568 case TargetLoweringBase::AtomicExpansionKind::None:
569 return false;
570 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
571 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
572 unsigned ValueSize = getAtomicOpSize(AI);
573 if (ValueSize < MinCASSize) {
574 expandPartwordAtomicRMW(AI,
575 TargetLoweringBase::AtomicExpansionKind::LLSC);
576 } else {
577 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
578 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
579 AI->getValOperand());
580 };
581 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
582 AI->getAlign(), AI->getOrdering(), PerformOp);
583 }
584 return true;
585 }
586 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
587 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
588 unsigned ValueSize = getAtomicOpSize(AI);
589 if (ValueSize < MinCASSize) {
590 expandPartwordAtomicRMW(AI,
591 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
592 } else {
594 Ctx.getSyncScopeNames(SSNs);
595 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
596 ? "system"
597 : SSNs[AI->getSyncScopeID()];
599 ORE.emit([&]() {
600 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
601 << "A compare and swap loop was generated for an atomic "
602 << AI->getOperationName(AI->getOperation()) << " operation at "
603 << MemScope << " memory scope";
604 });
606 }
607 return true;
608 }
609 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
610 expandAtomicRMWToMaskedIntrinsic(AI);
611 return true;
612 }
613 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
614 TLI->emitBitTestAtomicRMWIntrinsic(AI);
615 return true;
616 }
617 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
618 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
619 return true;
620 }
621 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
622 return lowerAtomicRMWInst(AI);
623 case TargetLoweringBase::AtomicExpansionKind::Expand:
624 TLI->emitExpandAtomicRMW(AI);
625 return true;
626 default:
627 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
628 }
629}
630
631namespace {
632
633struct PartwordMaskValues {
634 // These three fields are guaranteed to be set by createMaskInstrs.
635 Type *WordType = nullptr;
636 Type *ValueType = nullptr;
637 Type *IntValueType = nullptr;
638 Value *AlignedAddr = nullptr;
639 Align AlignedAddrAlignment;
640 // The remaining fields can be null.
641 Value *ShiftAmt = nullptr;
642 Value *Mask = nullptr;
643 Value *Inv_Mask = nullptr;
644};
645
647raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
648 auto PrintObj = [&O](auto *V) {
649 if (V)
650 O << *V;
651 else
652 O << "nullptr";
653 O << '\n';
654 };
655 O << "PartwordMaskValues {\n";
656 O << " WordType: ";
657 PrintObj(PMV.WordType);
658 O << " ValueType: ";
659 PrintObj(PMV.ValueType);
660 O << " AlignedAddr: ";
661 PrintObj(PMV.AlignedAddr);
662 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
663 O << " ShiftAmt: ";
664 PrintObj(PMV.ShiftAmt);
665 O << " Mask: ";
666 PrintObj(PMV.Mask);
667 O << " Inv_Mask: ";
668 PrintObj(PMV.Inv_Mask);
669 O << "}\n";
670 return O;
671}
672
673} // end anonymous namespace
674
675/// This is a helper function which builds instructions to provide
676/// values necessary for partword atomic operations. It takes an
677/// incoming address, Addr, and ValueType, and constructs the address,
678/// shift-amounts and masks needed to work with a larger value of size
679/// WordSize.
680///
681/// AlignedAddr: Addr rounded down to a multiple of WordSize
682///
683/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
684/// from AlignAddr for it to have the same value as if
685/// ValueType was loaded from Addr.
686///
687/// Mask: Value to mask with the value loaded from AlignAddr to
688/// include only the part that would've been loaded from Addr.
689///
690/// Inv_Mask: The inverse of Mask.
691static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
693 Value *Addr, Align AddrAlign,
694 unsigned MinWordSize) {
695 PartwordMaskValues PMV;
696
697 Module *M = I->getModule();
698 LLVMContext &Ctx = M->getContext();
699 const DataLayout &DL = M->getDataLayout();
700 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
701
702 PMV.ValueType = PMV.IntValueType = ValueType;
703 if (PMV.ValueType->isFloatingPointTy())
704 PMV.IntValueType =
705 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
706
707 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
708 : ValueType;
709 if (PMV.ValueType == PMV.WordType) {
710 PMV.AlignedAddr = Addr;
711 PMV.AlignedAddrAlignment = AddrAlign;
712 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
713 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
714 return PMV;
715 }
716
717 PMV.AlignedAddrAlignment = Align(MinWordSize);
718
719 assert(ValueSize < MinWordSize);
720
721 PointerType *PtrTy = cast<PointerType>(Addr->getType());
722 IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
723 Value *PtrLSB;
724
725 if (AddrAlign < MinWordSize) {
726 PMV.AlignedAddr = Builder.CreateIntrinsic(
727 Intrinsic::ptrmask, {PtrTy, IntTy},
728 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
729 "AlignedAddr");
730
731 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
732 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
733 } else {
734 // If the alignment is high enough, the LSB are known 0.
735 PMV.AlignedAddr = Addr;
736 PtrLSB = ConstantInt::getNullValue(IntTy);
737 }
738
739 if (DL.isLittleEndian()) {
740 // turn bytes into bits
741 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
742 } else {
743 // turn bytes into bits, and count from the other side.
744 PMV.ShiftAmt = Builder.CreateShl(
745 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
746 }
747
748 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
749 PMV.Mask = Builder.CreateShl(
750 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
751 "Mask");
752
753 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
754
755 return PMV;
756}
757
758static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
759 const PartwordMaskValues &PMV) {
760 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
761 if (PMV.WordType == PMV.ValueType)
762 return WideWord;
763
764 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
765 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
766 return Builder.CreateBitCast(Trunc, PMV.ValueType);
767}
768
769static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
770 Value *Updated, const PartwordMaskValues &PMV) {
771 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
772 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
773 if (PMV.WordType == PMV.ValueType)
774 return Updated;
775
776 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
777
778 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
779 Value *Shift =
780 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
781 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
782 Value *Or = Builder.CreateOr(And, Shift, "inserted");
783 return Or;
784}
785
786/// Emit IR to implement a masked version of a given atomicrmw
787/// operation. (That is, only the bits under the Mask should be
788/// affected by the operation)
790 IRBuilderBase &Builder, Value *Loaded,
791 Value *Shifted_Inc, Value *Inc,
792 const PartwordMaskValues &PMV) {
793 // TODO: update to use
794 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
795 // to merge bits from two values without requiring PMV.Inv_Mask.
796 switch (Op) {
797 case AtomicRMWInst::Xchg: {
798 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
799 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
800 return FinalVal;
801 }
805 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
808 case AtomicRMWInst::Nand: {
809 // The other arithmetic ops need to be masked into place.
810 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
811 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
812 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
813 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
814 return FinalVal;
815 }
826 // Finally, other ops will operate on the full value, so truncate down to
827 // the original size, and expand out again after doing the
828 // operation. Bitcasts will be inserted for FP values.
829 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
830 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
831 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
832 return FinalVal;
833 }
834 default:
835 llvm_unreachable("Unknown atomic op");
836 }
837}
838
839/// Expand a sub-word atomicrmw operation into an appropriate
840/// word-sized operation.
841///
842/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
843/// way as a typical atomicrmw expansion. The only difference here is
844/// that the operation inside of the loop may operate upon only a
845/// part of the value.
846void AtomicExpand::expandPartwordAtomicRMW(
848 AtomicOrdering MemOpOrder = AI->getOrdering();
849 SyncScope::ID SSID = AI->getSyncScopeID();
850
851 ReplacementIRBuilder Builder(AI, *DL);
852
853 PartwordMaskValues PMV =
854 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
855 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
856
857 Value *ValOperand_Shifted = nullptr;
858 if (AI->getOperation() == AtomicRMWInst::Xchg ||
862 ValOperand_Shifted =
863 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
864 PMV.ShiftAmt, "ValOperand_Shifted");
865 }
866
867 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
868 return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
869 ValOperand_Shifted, AI->getValOperand(), PMV);
870 };
871
872 Value *OldResult;
873 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
874 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
875 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
876 PerformPartwordOp, createCmpXchgInstFun);
877 } else {
878 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
879 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
880 PMV.AlignedAddrAlignment, MemOpOrder,
881 PerformPartwordOp);
882 }
883
884 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
885 AI->replaceAllUsesWith(FinalOldResult);
886 AI->eraseFromParent();
887}
888
889// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
890AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
891 ReplacementIRBuilder Builder(AI, *DL);
893
896 "Unable to widen operation");
897
898 PartwordMaskValues PMV =
899 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
900 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
901
902 Value *ValOperand_Shifted =
903 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
904 PMV.ShiftAmt, "ValOperand_Shifted");
905
906 Value *NewOperand;
907
908 if (Op == AtomicRMWInst::And)
909 NewOperand =
910 Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
911 else
912 NewOperand = ValOperand_Shifted;
913
914 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
915 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
916 AI->getOrdering(), AI->getSyncScopeID());
917 // TODO: Preserve metadata
918
919 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
920 AI->replaceAllUsesWith(FinalOldResult);
921 AI->eraseFromParent();
922 return NewAI;
923}
924
925bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
926 // The basic idea here is that we're expanding a cmpxchg of a
927 // smaller memory size up to a word-sized cmpxchg. To do this, we
928 // need to add a retry-loop for strong cmpxchg, so that
929 // modifications to other parts of the word don't cause a spurious
930 // failure.
931
932 // This generates code like the following:
933 // [[Setup mask values PMV.*]]
934 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
935 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
936 // %InitLoaded = load i32* %addr
937 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
938 // br partword.cmpxchg.loop
939 // partword.cmpxchg.loop:
940 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
941 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
942 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
943 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
944 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
945 // i32 %FullWord_NewVal success_ordering failure_ordering
946 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
947 // %Success = extractvalue { i32, i1 } %NewCI, 1
948 // br i1 %Success, label %partword.cmpxchg.end,
949 // label %partword.cmpxchg.failure
950 // partword.cmpxchg.failure:
951 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
952 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
953 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
954 // label %partword.cmpxchg.end
955 // partword.cmpxchg.end:
956 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
957 // %FinalOldVal = trunc i32 %tmp1 to i8
958 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
959 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
960
962 Value *Cmp = CI->getCompareOperand();
963 Value *NewVal = CI->getNewValOperand();
964
965 BasicBlock *BB = CI->getParent();
966 Function *F = BB->getParent();
967 ReplacementIRBuilder Builder(CI, *DL);
968 LLVMContext &Ctx = Builder.getContext();
969
970 BasicBlock *EndBB =
971 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
972 auto FailureBB =
973 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
974 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
975
976 // The split call above "helpfully" added a branch at the end of BB
977 // (to the wrong place).
978 std::prev(BB->end())->eraseFromParent();
979 Builder.SetInsertPoint(BB);
980
981 PartwordMaskValues PMV =
982 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
983 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
984
985 // Shift the incoming values over, into the right location in the word.
986 Value *NewVal_Shifted =
987 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
988 Value *Cmp_Shifted =
989 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
990
991 // Load the entire current word, and mask into place the expected and new
992 // values
993 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
994 InitLoaded->setVolatile(CI->isVolatile());
995 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
996 Builder.CreateBr(LoopBB);
997
998 // partword.cmpxchg.loop:
999 Builder.SetInsertPoint(LoopBB);
1000 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1001 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1002
1003 // Mask/Or the expected and new values into place in the loaded word.
1004 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1005 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1006 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1007 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1009 NewCI->setVolatile(CI->isVolatile());
1010 // When we're building a strong cmpxchg, we need a loop, so you
1011 // might think we could use a weak cmpxchg inside. But, using strong
1012 // allows the below comparison for ShouldContinue, and we're
1013 // expecting the underlying cmpxchg to be a machine instruction,
1014 // which is strong anyways.
1015 NewCI->setWeak(CI->isWeak());
1016
1017 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1018 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1019
1020 if (CI->isWeak())
1021 Builder.CreateBr(EndBB);
1022 else
1023 Builder.CreateCondBr(Success, EndBB, FailureBB);
1024
1025 // partword.cmpxchg.failure:
1026 Builder.SetInsertPoint(FailureBB);
1027 // Upon failure, verify that the masked-out part of the loaded value
1028 // has been modified. If it didn't, abort the cmpxchg, since the
1029 // masked-in part must've.
1030 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1031 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1032 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1033
1034 // Add the second value to the phi from above
1035 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1036
1037 // partword.cmpxchg.end:
1038 Builder.SetInsertPoint(CI);
1039
1040 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1041 Value *Res = PoisonValue::get(CI->getType());
1042 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1043 Res = Builder.CreateInsertValue(Res, Success, 1);
1044
1045 CI->replaceAllUsesWith(Res);
1046 CI->eraseFromParent();
1047 return true;
1048}
1049
1050void AtomicExpand::expandAtomicOpToLLSC(
1051 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1052 AtomicOrdering MemOpOrder,
1053 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1054 ReplacementIRBuilder Builder(I, *DL);
1055 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1056 MemOpOrder, PerformOp);
1057
1058 I->replaceAllUsesWith(Loaded);
1059 I->eraseFromParent();
1060}
1061
1062void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1063 ReplacementIRBuilder Builder(AI, *DL);
1064
1065 PartwordMaskValues PMV =
1066 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1067 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1068
1069 // The value operand must be sign-extended for signed min/max so that the
1070 // target's signed comparison instructions can be used. Otherwise, just
1071 // zero-ext.
1072 Instruction::CastOps CastOp = Instruction::ZExt;
1073 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1074 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1075 CastOp = Instruction::SExt;
1076
1077 Value *ValOperand_Shifted = Builder.CreateShl(
1078 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1079 PMV.ShiftAmt, "ValOperand_Shifted");
1080 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1081 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1082 AI->getOrdering());
1083 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1084 AI->replaceAllUsesWith(FinalOldResult);
1085 AI->eraseFromParent();
1086}
1087
1088void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
1089 ReplacementIRBuilder Builder(CI, *DL);
1090
1091 PartwordMaskValues PMV = createMaskInstrs(
1092 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1093 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1094
1095 Value *CmpVal_Shifted = Builder.CreateShl(
1096 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1097 "CmpVal_Shifted");
1098 Value *NewVal_Shifted = Builder.CreateShl(
1099 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1100 "NewVal_Shifted");
1101 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1102 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1103 CI->getMergedOrdering());
1104 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1105 Value *Res = PoisonValue::get(CI->getType());
1106 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1107 Value *Success = Builder.CreateICmpEQ(
1108 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1109 Res = Builder.CreateInsertValue(Res, Success, 1);
1110
1111 CI->replaceAllUsesWith(Res);
1112 CI->eraseFromParent();
1113}
1114
1115Value *AtomicExpand::insertRMWLLSCLoop(
1116 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1117 AtomicOrdering MemOpOrder,
1118 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1119 LLVMContext &Ctx = Builder.getContext();
1120 BasicBlock *BB = Builder.GetInsertBlock();
1121 Function *F = BB->getParent();
1122
1123 assert(AddrAlign >=
1124 F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1125 "Expected at least natural alignment at this point.");
1126
1127 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1128 //
1129 // The standard expansion we produce is:
1130 // [...]
1131 // atomicrmw.start:
1132 // %loaded = @load.linked(%addr)
1133 // %new = some_op iN %loaded, %incr
1134 // %stored = @store_conditional(%new, %addr)
1135 // %try_again = icmp i32 ne %stored, 0
1136 // br i1 %try_again, label %loop, label %atomicrmw.end
1137 // atomicrmw.end:
1138 // [...]
1139 BasicBlock *ExitBB =
1140 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1141 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1142
1143 // The split call above "helpfully" added a branch at the end of BB (to the
1144 // wrong place).
1145 std::prev(BB->end())->eraseFromParent();
1146 Builder.SetInsertPoint(BB);
1147 Builder.CreateBr(LoopBB);
1148
1149 // Start the main loop block now that we've taken care of the preliminaries.
1150 Builder.SetInsertPoint(LoopBB);
1151 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1152
1153 Value *NewVal = PerformOp(Builder, Loaded);
1154
1155 Value *StoreSuccess =
1156 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1157 Value *TryAgain = Builder.CreateICmpNE(
1158 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1159 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1160
1161 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1162 return Loaded;
1163}
1164
1165/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1166/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1167/// IR. As a migration step, we convert back to what use to be the standard
1168/// way to represent a pointer cmpxchg so that we can update backends one by
1169/// one.
1171AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1172 auto *M = CI->getModule();
1173 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1174 M->getDataLayout());
1175
1176 ReplacementIRBuilder Builder(CI, *DL);
1177
1178 Value *Addr = CI->getPointerOperand();
1179
1180 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1181 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1182
1183 auto *NewCI = Builder.CreateAtomicCmpXchg(
1184 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1185 CI->getFailureOrdering(), CI->getSyncScopeID());
1186 NewCI->setVolatile(CI->isVolatile());
1187 NewCI->setWeak(CI->isWeak());
1188 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1189
1190 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1191 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1192
1193 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1194
1195 Value *Res = PoisonValue::get(CI->getType());
1196 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1197 Res = Builder.CreateInsertValue(Res, Succ, 1);
1198
1199 CI->replaceAllUsesWith(Res);
1200 CI->eraseFromParent();
1201 return NewCI;
1202}
1203
1204bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1205 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1206 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1207 Value *Addr = CI->getPointerOperand();
1208 BasicBlock *BB = CI->getParent();
1209 Function *F = BB->getParent();
1210 LLVMContext &Ctx = F->getContext();
1211 // If shouldInsertFencesForAtomic() returns true, then the target does not
1212 // want to deal with memory orders, and emitLeading/TrailingFence should take
1213 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1214 // should preserve the ordering.
1215 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1216 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1217 ? AtomicOrdering::Monotonic
1218 : CI->getMergedOrdering();
1219
1220 // In implementations which use a barrier to achieve release semantics, we can
1221 // delay emitting this barrier until we know a store is actually going to be
1222 // attempted. The cost of this delay is that we need 2 copies of the block
1223 // emitting the load-linked, affecting code size.
1224 //
1225 // Ideally, this logic would be unconditional except for the minsize check
1226 // since in other cases the extra blocks naturally collapse down to the
1227 // minimal loop. Unfortunately, this puts too much stress on later
1228 // optimisations so we avoid emitting the extra logic in those cases too.
1229 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1230 SuccessOrder != AtomicOrdering::Monotonic &&
1231 SuccessOrder != AtomicOrdering::Acquire &&
1232 !F->hasMinSize();
1233
1234 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1235 // do it even on minsize.
1236 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1237
1238 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1239 //
1240 // The full expansion we produce is:
1241 // [...]
1242 // %aligned.addr = ...
1243 // cmpxchg.start:
1244 // %unreleasedload = @load.linked(%aligned.addr)
1245 // %unreleasedload.extract = extract value from %unreleasedload
1246 // %should_store = icmp eq %unreleasedload.extract, %desired
1247 // br i1 %should_store, label %cmpxchg.releasingstore,
1248 // label %cmpxchg.nostore
1249 // cmpxchg.releasingstore:
1250 // fence?
1251 // br label cmpxchg.trystore
1252 // cmpxchg.trystore:
1253 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1254 // [%releasedload, %cmpxchg.releasedload]
1255 // %updated.new = insert %new into %loaded.trystore
1256 // %stored = @store_conditional(%updated.new, %aligned.addr)
1257 // %success = icmp eq i32 %stored, 0
1258 // br i1 %success, label %cmpxchg.success,
1259 // label %cmpxchg.releasedload/%cmpxchg.failure
1260 // cmpxchg.releasedload:
1261 // %releasedload = @load.linked(%aligned.addr)
1262 // %releasedload.extract = extract value from %releasedload
1263 // %should_store = icmp eq %releasedload.extract, %desired
1264 // br i1 %should_store, label %cmpxchg.trystore,
1265 // label %cmpxchg.failure
1266 // cmpxchg.success:
1267 // fence?
1268 // br label %cmpxchg.end
1269 // cmpxchg.nostore:
1270 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1271 // [%releasedload,
1272 // %cmpxchg.releasedload/%cmpxchg.trystore]
1273 // @load_linked_fail_balance()?
1274 // br label %cmpxchg.failure
1275 // cmpxchg.failure:
1276 // fence?
1277 // br label %cmpxchg.end
1278 // cmpxchg.end:
1279 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1280 // [%loaded.trystore, %cmpxchg.trystore]
1281 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1282 // %loaded = extract value from %loaded.exit
1283 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1284 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1285 // [...]
1286 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1287 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1288 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1289 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1290 auto ReleasedLoadBB =
1291 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1292 auto TryStoreBB =
1293 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1294 auto ReleasingStoreBB =
1295 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1296 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1297
1298 ReplacementIRBuilder Builder(CI, *DL);
1299
1300 // The split call above "helpfully" added a branch at the end of BB (to the
1301 // wrong place), but we might want a fence too. It's easiest to just remove
1302 // the branch entirely.
1303 std::prev(BB->end())->eraseFromParent();
1304 Builder.SetInsertPoint(BB);
1305 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1306 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1307
1308 PartwordMaskValues PMV =
1309 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1310 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1311 Builder.CreateBr(StartBB);
1312
1313 // Start the main loop block now that we've taken care of the preliminaries.
1314 Builder.SetInsertPoint(StartBB);
1315 Value *UnreleasedLoad =
1316 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1317 Value *UnreleasedLoadExtract =
1318 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1319 Value *ShouldStore = Builder.CreateICmpEQ(
1320 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1321
1322 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1323 // jump straight past that fence instruction (if it exists).
1324 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1325
1326 Builder.SetInsertPoint(ReleasingStoreBB);
1327 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1328 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1329 Builder.CreateBr(TryStoreBB);
1330
1331 Builder.SetInsertPoint(TryStoreBB);
1332 PHINode *LoadedTryStore =
1333 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1334 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1335 Value *NewValueInsert =
1336 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1337 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1338 PMV.AlignedAddr, MemOpOrder);
1339 StoreSuccess = Builder.CreateICmpEQ(
1340 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1341 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1342 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1343 CI->isWeak() ? FailureBB : RetryBB);
1344
1345 Builder.SetInsertPoint(ReleasedLoadBB);
1346 Value *SecondLoad;
1347 if (HasReleasedLoadBB) {
1348 SecondLoad =
1349 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1350 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1351 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1352 CI->getCompareOperand(), "should_store");
1353
1354 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1355 // jump straight past that fence instruction (if it exists).
1356 Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1357 // Update PHI node in TryStoreBB.
1358 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1359 } else
1360 Builder.CreateUnreachable();
1361
1362 // Make sure later instructions don't get reordered with a fence if
1363 // necessary.
1364 Builder.SetInsertPoint(SuccessBB);
1365 if (ShouldInsertFencesForAtomic ||
1366 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1367 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1368 Builder.CreateBr(ExitBB);
1369
1370 Builder.SetInsertPoint(NoStoreBB);
1371 PHINode *LoadedNoStore =
1372 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1373 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1374 if (HasReleasedLoadBB)
1375 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1376
1377 // In the failing case, where we don't execute the store-conditional, the
1378 // target might want to balance out the load-linked with a dedicated
1379 // instruction (e.g., on ARM, clearing the exclusive monitor).
1380 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1381 Builder.CreateBr(FailureBB);
1382
1383 Builder.SetInsertPoint(FailureBB);
1384 PHINode *LoadedFailure =
1385 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1386 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1387 if (CI->isWeak())
1388 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1389 if (ShouldInsertFencesForAtomic)
1390 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1391 Builder.CreateBr(ExitBB);
1392
1393 // Finally, we have control-flow based knowledge of whether the cmpxchg
1394 // succeeded or not. We expose this to later passes by converting any
1395 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1396 // PHI.
1397 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1398 PHINode *LoadedExit =
1399 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1400 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1401 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1402 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1403 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1404 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1405
1406 // This is the "exit value" from the cmpxchg expansion. It may be of
1407 // a type wider than the one in the cmpxchg instruction.
1408 Value *LoadedFull = LoadedExit;
1409
1410 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1411 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1412
1413 // Look for any users of the cmpxchg that are just comparing the loaded value
1414 // against the desired one, and replace them with the CFG-derived version.
1416 for (auto *User : CI->users()) {
1417 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1418 if (!EV)
1419 continue;
1420
1421 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1422 "weird extraction from { iN, i1 }");
1423
1424 if (EV->getIndices()[0] == 0)
1425 EV->replaceAllUsesWith(Loaded);
1426 else
1428
1429 PrunedInsts.push_back(EV);
1430 }
1431
1432 // We can remove the instructions now we're no longer iterating through them.
1433 for (auto *EV : PrunedInsts)
1434 EV->eraseFromParent();
1435
1436 if (!CI->use_empty()) {
1437 // Some use of the full struct return that we don't understand has happened,
1438 // so we've got to reconstruct it properly.
1439 Value *Res;
1440 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1441 Res = Builder.CreateInsertValue(Res, Success, 1);
1442
1443 CI->replaceAllUsesWith(Res);
1444 }
1445
1446 CI->eraseFromParent();
1447 return true;
1448}
1449
1450bool AtomicExpand::isIdempotentRMW(AtomicRMWInst *RMWI) {
1451 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1452 if (!C)
1453 return false;
1454
1456 switch (Op) {
1457 case AtomicRMWInst::Add:
1458 case AtomicRMWInst::Sub:
1459 case AtomicRMWInst::Or:
1460 case AtomicRMWInst::Xor:
1461 return C->isZero();
1462 case AtomicRMWInst::And:
1463 return C->isMinusOne();
1464 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1465 default:
1466 return false;
1467 }
1468}
1469
1470bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1471 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1472 tryExpandAtomicLoad(ResultingLoad);
1473 return true;
1474 }
1475 return false;
1476}
1477
1478Value *AtomicExpand::insertRMWCmpXchgLoop(
1479 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1480 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1481 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1482 CreateCmpXchgInstFun CreateCmpXchg) {
1483 LLVMContext &Ctx = Builder.getContext();
1484 BasicBlock *BB = Builder.GetInsertBlock();
1485 Function *F = BB->getParent();
1486
1487 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1488 //
1489 // The standard expansion we produce is:
1490 // [...]
1491 // %init_loaded = load atomic iN* %addr
1492 // br label %loop
1493 // loop:
1494 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1495 // %new = some_op iN %loaded, %incr
1496 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1497 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1498 // %success = extractvalue { iN, i1 } %pair, 1
1499 // br i1 %success, label %atomicrmw.end, label %loop
1500 // atomicrmw.end:
1501 // [...]
1502 BasicBlock *ExitBB =
1503 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1504 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1505
1506 // The split call above "helpfully" added a branch at the end of BB (to the
1507 // wrong place), but we want a load. It's easiest to just remove
1508 // the branch entirely.
1509 std::prev(BB->end())->eraseFromParent();
1510 Builder.SetInsertPoint(BB);
1511 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1512 Builder.CreateBr(LoopBB);
1513
1514 // Start the main loop block now that we've taken care of the preliminaries.
1515 Builder.SetInsertPoint(LoopBB);
1516 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1517 Loaded->addIncoming(InitLoaded, BB);
1518
1519 Value *NewVal = PerformOp(Builder, Loaded);
1520
1521 Value *NewLoaded = nullptr;
1522 Value *Success = nullptr;
1523
1524 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1525 MemOpOrder == AtomicOrdering::Unordered
1526 ? AtomicOrdering::Monotonic
1527 : MemOpOrder,
1528 SSID, Success, NewLoaded);
1529 assert(Success && NewLoaded);
1530
1531 Loaded->addIncoming(NewLoaded, LoopBB);
1532
1533 Builder.CreateCondBr(Success, ExitBB, LoopBB);
1534
1535 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1536 return NewLoaded;
1537}
1538
1539bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1540 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1541 unsigned ValueSize = getAtomicOpSize(CI);
1542
1543 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1544 default:
1545 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1546 case TargetLoweringBase::AtomicExpansionKind::None:
1547 if (ValueSize < MinCASSize)
1548 return expandPartwordCmpXchg(CI);
1549 return false;
1550 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1551 return expandAtomicCmpXchg(CI);
1552 }
1553 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1554 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1555 return true;
1556 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1557 return lowerAtomicCmpXchgInst(CI);
1558 }
1559}
1560
1561// Note: This function is exposed externally by AtomicExpandUtils.h
1563 CreateCmpXchgInstFun CreateCmpXchg) {
1564 ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout());
1565 Builder.setIsFPConstrained(
1566 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1567
1568 // FIXME: If FP exceptions are observable, we should force them off for the
1569 // loop for the FP atomics.
1570 Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1571 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1572 AI->getOrdering(), AI->getSyncScopeID(),
1573 [&](IRBuilderBase &Builder, Value *Loaded) {
1574 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1575 AI->getValOperand());
1576 },
1577 CreateCmpXchg);
1578
1579 AI->replaceAllUsesWith(Loaded);
1580 AI->eraseFromParent();
1581 return true;
1582}
1583
1584// In order to use one of the sized library calls such as
1585// __atomic_fetch_add_4, the alignment must be sufficient, the size
1586// must be one of the potentially-specialized sizes, and the value
1587// type must actually exist in C on the target (otherwise, the
1588// function wouldn't actually be defined.)
1589static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1590 const DataLayout &DL) {
1591 // TODO: "LargestSize" is an approximation for "largest type that
1592 // you can express in C". It seems to be the case that int128 is
1593 // supported on all 64-bit platforms, otherwise only up to 64-bit
1594 // integers are supported. If we get this wrong, then we'll try to
1595 // call a sized libcall that doesn't actually exist. There should
1596 // really be some more reliable way in LLVM of determining integer
1597 // sizes which are valid in the target's C ABI...
1598 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1599 return Alignment >= Size &&
1600 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1601 Size <= LargestSize;
1602}
1603
1604void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1605 static const RTLIB::Libcall Libcalls[6] = {
1606 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1607 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1608 unsigned Size = getAtomicOpSize(I);
1609
1610 bool expanded = expandAtomicOpToLibcall(
1611 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1612 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1613 if (!expanded)
1614 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
1615}
1616
1617void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1618 static const RTLIB::Libcall Libcalls[6] = {
1619 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1620 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1621 unsigned Size = getAtomicOpSize(I);
1622
1623 bool expanded = expandAtomicOpToLibcall(
1624 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1625 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1626 if (!expanded)
1627 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
1628}
1629
1630void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1631 static const RTLIB::Libcall Libcalls[6] = {
1632 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1633 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1634 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1635 unsigned Size = getAtomicOpSize(I);
1636
1637 bool expanded = expandAtomicOpToLibcall(
1638 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1639 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1640 Libcalls);
1641 if (!expanded)
1642 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
1643}
1644
1646 static const RTLIB::Libcall LibcallsXchg[6] = {
1647 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1648 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1649 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1650 static const RTLIB::Libcall LibcallsAdd[6] = {
1651 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1652 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1653 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1654 static const RTLIB::Libcall LibcallsSub[6] = {
1655 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1656 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1657 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1658 static const RTLIB::Libcall LibcallsAnd[6] = {
1659 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1660 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1661 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1662 static const RTLIB::Libcall LibcallsOr[6] = {
1663 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1664 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1665 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1666 static const RTLIB::Libcall LibcallsXor[6] = {
1667 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1668 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1669 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1670 static const RTLIB::Libcall LibcallsNand[6] = {
1671 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1672 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1673 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1674
1675 switch (Op) {
1677 llvm_unreachable("Should not have BAD_BINOP.");
1679 return ArrayRef(LibcallsXchg);
1680 case AtomicRMWInst::Add:
1681 return ArrayRef(LibcallsAdd);
1682 case AtomicRMWInst::Sub:
1683 return ArrayRef(LibcallsSub);
1684 case AtomicRMWInst::And:
1685 return ArrayRef(LibcallsAnd);
1686 case AtomicRMWInst::Or:
1687 return ArrayRef(LibcallsOr);
1688 case AtomicRMWInst::Xor:
1689 return ArrayRef(LibcallsXor);
1691 return ArrayRef(LibcallsNand);
1692 case AtomicRMWInst::Max:
1693 case AtomicRMWInst::Min:
1702 // No atomic libcalls are available for max/min/umax/umin.
1703 return {};
1704 }
1705 llvm_unreachable("Unexpected AtomicRMW operation.");
1706}
1707
1708void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1709 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1710
1711 unsigned Size = getAtomicOpSize(I);
1712
1713 bool Success = false;
1714 if (!Libcalls.empty())
1715 Success = expandAtomicOpToLibcall(
1716 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1717 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1718
1719 // The expansion failed: either there were no libcalls at all for
1720 // the operation (min/max), or there were only size-specialized
1721 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1722 // CAS libcall, via a CAS loop, instead.
1723 if (!Success) {
1725 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1726 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1727 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
1728 // Create the CAS instruction normally...
1729 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1730 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1732 Success = Builder.CreateExtractValue(Pair, 1, "success");
1733 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1734
1735 // ...and then expand the CAS into a libcall.
1736 expandAtomicCASToLibcall(Pair);
1737 });
1738 }
1739}
1740
1741// A helper routine for the above expandAtomic*ToLibcall functions.
1742//
1743// 'Libcalls' contains an array of enum values for the particular
1744// ATOMIC libcalls to be emitted. All of the other arguments besides
1745// 'I' are extracted from the Instruction subclass by the
1746// caller. Depending on the particular call, some will be null.
1747bool AtomicExpand::expandAtomicOpToLibcall(
1748 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1749 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1750 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1751 assert(Libcalls.size() == 6);
1752
1753 LLVMContext &Ctx = I->getContext();
1754 Module *M = I->getModule();
1755 const DataLayout &DL = M->getDataLayout();
1756 IRBuilder<> Builder(I);
1757 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1758
1759 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1760 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1761
1762 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1763
1764 // TODO: the "order" argument type is "int", not int32. So
1765 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1767 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1768 Constant *OrderingVal =
1769 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1770 Constant *Ordering2Val = nullptr;
1771 if (CASExpected) {
1772 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1773 Ordering2Val =
1774 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1775 }
1776 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1777
1778 RTLIB::Libcall RTLibType;
1779 if (UseSizedLibcall) {
1780 switch (Size) {
1781 case 1:
1782 RTLibType = Libcalls[1];
1783 break;
1784 case 2:
1785 RTLibType = Libcalls[2];
1786 break;
1787 case 4:
1788 RTLibType = Libcalls[3];
1789 break;
1790 case 8:
1791 RTLibType = Libcalls[4];
1792 break;
1793 case 16:
1794 RTLibType = Libcalls[5];
1795 break;
1796 }
1797 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1798 RTLibType = Libcalls[0];
1799 } else {
1800 // Can't use sized function, and there's no generic for this
1801 // operation, so give up.
1802 return false;
1803 }
1804
1805 if (!TLI->getLibcallName(RTLibType)) {
1806 // This target does not implement the requested atomic libcall so give up.
1807 return false;
1808 }
1809
1810 // Build up the function call. There's two kinds. First, the sized
1811 // variants. These calls are going to be one of the following (with
1812 // N=1,2,4,8,16):
1813 // iN __atomic_load_N(iN *ptr, int ordering)
1814 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1815 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1816 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1817 // int success_order, int failure_order)
1818 //
1819 // Note that these functions can be used for non-integer atomic
1820 // operations, the values just need to be bitcast to integers on the
1821 // way in and out.
1822 //
1823 // And, then, the generic variants. They look like the following:
1824 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1825 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1826 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1827 // int ordering)
1828 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1829 // void *desired, int success_order,
1830 // int failure_order)
1831 //
1832 // The different signatures are built up depending on the
1833 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1834 // variables.
1835
1836 AllocaInst *AllocaCASExpected = nullptr;
1837 AllocaInst *AllocaValue = nullptr;
1838 AllocaInst *AllocaResult = nullptr;
1839
1840 Type *ResultTy;
1842 AttributeList Attr;
1843
1844 // 'size' argument.
1845 if (!UseSizedLibcall) {
1846 // Note, getIntPtrType is assumed equivalent to size_t.
1847 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1848 }
1849
1850 // 'ptr' argument.
1851 // note: This assumes all address spaces share a common libfunc
1852 // implementation and that addresses are convertable. For systems without
1853 // that property, we'd need to extend this mechanism to support AS-specific
1854 // families of atomic intrinsics.
1855 Value *PtrVal = PointerOperand;
1856 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
1857 Args.push_back(PtrVal);
1858
1859 // 'expected' argument, if present.
1860 if (CASExpected) {
1861 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1862 AllocaCASExpected->setAlignment(AllocaAlignment);
1863 Builder.CreateLifetimeStart(AllocaCASExpected, SizeVal64);
1864 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1865 Args.push_back(AllocaCASExpected);
1866 }
1867
1868 // 'val' argument ('desired' for cas), if present.
1869 if (ValueOperand) {
1870 if (UseSizedLibcall) {
1871 Value *IntValue =
1872 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1873 Args.push_back(IntValue);
1874 } else {
1875 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1876 AllocaValue->setAlignment(AllocaAlignment);
1877 Builder.CreateLifetimeStart(AllocaValue, SizeVal64);
1878 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1879 Args.push_back(AllocaValue);
1880 }
1881 }
1882
1883 // 'ret' argument.
1884 if (!CASExpected && HasResult && !UseSizedLibcall) {
1885 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1886 AllocaResult->setAlignment(AllocaAlignment);
1887 Builder.CreateLifetimeStart(AllocaResult, SizeVal64);
1888 Args.push_back(AllocaResult);
1889 }
1890
1891 // 'ordering' ('success_order' for cas) argument.
1892 Args.push_back(OrderingVal);
1893
1894 // 'failure_order' argument, if present.
1895 if (Ordering2Val)
1896 Args.push_back(Ordering2Val);
1897
1898 // Now, the return type.
1899 if (CASExpected) {
1900 ResultTy = Type::getInt1Ty(Ctx);
1901 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
1902 } else if (HasResult && UseSizedLibcall)
1903 ResultTy = SizedIntTy;
1904 else
1905 ResultTy = Type::getVoidTy(Ctx);
1906
1907 // Done with setting up arguments and return types, create the call:
1909 for (Value *Arg : Args)
1910 ArgTys.push_back(Arg->getType());
1911 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1912 FunctionCallee LibcallFn =
1913 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1914 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1915 Call->setAttributes(Attr);
1916 Value *Result = Call;
1917
1918 // And then, extract the results...
1919 if (ValueOperand && !UseSizedLibcall)
1920 Builder.CreateLifetimeEnd(AllocaValue, SizeVal64);
1921
1922 if (CASExpected) {
1923 // The final result from the CAS is {load of 'expected' alloca, bool result
1924 // from call}
1925 Type *FinalResultTy = I->getType();
1926 Value *V = PoisonValue::get(FinalResultTy);
1927 Value *ExpectedOut = Builder.CreateAlignedLoad(
1928 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
1929 Builder.CreateLifetimeEnd(AllocaCASExpected, SizeVal64);
1930 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1931 V = Builder.CreateInsertValue(V, Result, 1);
1932 I->replaceAllUsesWith(V);
1933 } else if (HasResult) {
1934 Value *V;
1935 if (UseSizedLibcall)
1936 V = Builder.CreateBitOrPointerCast(Result, I->getType());
1937 else {
1938 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
1939 AllocaAlignment);
1940 Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);
1941 }
1942 I->replaceAllUsesWith(V);
1943 }
1944 I->eraseFromParent();
1945 return true;
1946}
#define Success
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static unsigned getAtomicOpSize(LoadInst *LI)
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
#define DEBUG_TYPE
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:184
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
Select target instructions out of generic instructions
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
Definition: Instructions.h:58
void setAlignment(Align Align)
Definition: Instructions.h:129
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:513
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
Definition: Instructions.h:621
void setWeak(bool IsWeak)
Definition: Instructions.h:578
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:569
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:608
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:666
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:558
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:576
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:573
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:596
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:634
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:718
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:834
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
Definition: Instructions.h:844
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:730
@ Add
*p = old + v
Definition: Instructions.h:734
@ FAdd
*p = old + v
Definition: Instructions.h:755
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:748
@ Or
*p = old | v
Definition: Instructions.h:742
@ Sub
*p = old - v
Definition: Instructions.h:736
@ And
*p = old & v
Definition: Instructions.h:738
@ Xor
*p = old ^ v
Definition: Instructions.h:744
@ FSub
*p = old - v
Definition: Instructions.h:758
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:770
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:746
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:752
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:766
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:750
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:762
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:774
@ Nand
*p = ~(old & v)
Definition: Instructions.h:740
Value * getPointerOperand()
Definition: Instructions.h:877
BinOp getOperation() const
Definition: Instructions.h:812
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:868
Value * getValOperand()
Definition: Instructions.h:881
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:854
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
Definition: Attributes.h:551
LLVM Basic Block Representation.
Definition: BasicBlock.h:60
iterator end()
Definition: BasicBlock.h:442
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:429
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:198
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:607
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:205
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:833
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:888
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:840
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:666
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1993
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1837
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2493
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:525
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1803
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1254
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:481
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2486
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:930
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:175
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2092
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1428
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:174
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2215
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:219
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2175
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2367
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1745
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2211
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2097
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1111
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1786
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1407
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:1997
LLVMContext & getContext() const
Definition: IRBuilder.h:176
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1466
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2087
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1850
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:496
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1488
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1105
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2131
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1822
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2382
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1510
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2102
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2636
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:71
const BasicBlock * getParent() const
Definition: Instruction.h:134
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:93
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:75
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:285
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Definition: Instructions.h:177
Value * getPointerOperand()
Definition: Instructions.h:264
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:214
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:250
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:229
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:217
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:239
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:220
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:283
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.h:275
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1743
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:340
void setAlignment(Align Align)
Definition: Instructions.h:349
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:376
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:255
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition: ilist_node.h:109
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createAtomicExpandPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
void initializeAtomicExpandPass(PassRegistry &)
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
Definition: LowerAtomic.cpp:41
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
Definition: LowerAtomic.cpp:22
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:351
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:390