LLVM 17.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
37#include "llvm/IR/Instruction.h"
39#include "llvm/IR/Module.h"
40#include "llvm/IR/Type.h"
41#include "llvm/IR/User.h"
42#include "llvm/IR/Value.h"
44#include "llvm/Pass.h"
47#include "llvm/Support/Debug.h"
52#include <cassert>
53#include <cstdint>
54#include <iterator>
55
56using namespace llvm;
57
58#define DEBUG_TYPE "atomic-expand"
59
60namespace {
61
62class AtomicExpand : public FunctionPass {
63 const TargetLowering *TLI = nullptr;
64 const DataLayout *DL = nullptr;
65
66public:
67 static char ID; // Pass identification, replacement for typeid
68
69 AtomicExpand() : FunctionPass(ID) {
71 }
72
73 bool runOnFunction(Function &F) override;
74
75private:
76 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
77 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
78 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
79 bool tryExpandAtomicLoad(LoadInst *LI);
80 bool expandAtomicLoadToLL(LoadInst *LI);
81 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
82 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
83 bool tryExpandAtomicStore(StoreInst *SI);
84 void expandAtomicStore(StoreInst *SI);
85 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
86 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
87 Value *
88 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
89 Align AddrAlign, AtomicOrdering MemOpOrder,
90 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
91 void expandAtomicOpToLLSC(
92 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
93 AtomicOrdering MemOpOrder,
94 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
95 void expandPartwordAtomicRMW(
97 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
98 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
99 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
100 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
101
102 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
103 static Value *insertRMWCmpXchgLoop(
104 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
105 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
106 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
107 CreateCmpXchgInstFun CreateCmpXchg);
108 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
109
110 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
111 bool isIdempotentRMW(AtomicRMWInst *RMWI);
112 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
113
114 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
115 Value *PointerOperand, Value *ValueOperand,
116 Value *CASExpected, AtomicOrdering Ordering,
117 AtomicOrdering Ordering2,
118 ArrayRef<RTLIB::Libcall> Libcalls);
119 void expandAtomicLoadToLibcall(LoadInst *LI);
120 void expandAtomicStoreToLibcall(StoreInst *LI);
121 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
122 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
123
124 friend bool
126 CreateCmpXchgInstFun CreateCmpXchg);
127};
128
129// IRBuilder to be used for replacement atomic instructions.
130struct ReplacementIRBuilder : IRBuilder<InstSimplifyFolder> {
131 // Preserves the DebugLoc from I, and preserves still valid metadata.
132 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
133 : IRBuilder(I->getContext(), DL) {
135 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
136 }
137};
138
139} // end anonymous namespace
140
141char AtomicExpand::ID = 0;
142
143char &llvm::AtomicExpandID = AtomicExpand::ID;
144
145INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false,
146 false)
147
148FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
149
150// Helper functions to retrieve the size of atomic instructions.
151static unsigned getAtomicOpSize(LoadInst *LI) {
152 const DataLayout &DL = LI->getModule()->getDataLayout();
153 return DL.getTypeStoreSize(LI->getType());
154}
155
156static unsigned getAtomicOpSize(StoreInst *SI) {
157 const DataLayout &DL = SI->getModule()->getDataLayout();
158 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
159}
160
161static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
162 const DataLayout &DL = RMWI->getModule()->getDataLayout();
163 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
164}
165
166static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
167 const DataLayout &DL = CASI->getModule()->getDataLayout();
168 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
169}
170
171// Determine if a particular atomic operation has a supported size,
172// and is of appropriate alignment, to be passed through for target
173// lowering. (Versus turning into a __atomic libcall)
174template <typename Inst>
175static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
176 unsigned Size = getAtomicOpSize(I);
177 Align Alignment = I->getAlign();
178 return Alignment >= Size &&
180}
181
182bool AtomicExpand::runOnFunction(Function &F) {
183 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
184 if (!TPC)
185 return false;
186
187 auto &TM = TPC->getTM<TargetMachine>();
188 const auto *Subtarget = TM.getSubtargetImpl(F);
189 if (!Subtarget->enableAtomicExpand())
190 return false;
191 TLI = Subtarget->getTargetLowering();
192 DL = &F.getParent()->getDataLayout();
193
195
196 // Changing control-flow while iterating through it is a bad idea, so gather a
197 // list of all atomic instructions before we start.
198 for (Instruction &I : instructions(F))
199 if (I.isAtomic() && !isa<FenceInst>(&I))
200 AtomicInsts.push_back(&I);
201
202 bool MadeChange = false;
203 for (auto *I : AtomicInsts) {
204 auto LI = dyn_cast<LoadInst>(I);
205 auto SI = dyn_cast<StoreInst>(I);
206 auto RMWI = dyn_cast<AtomicRMWInst>(I);
207 auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
208 assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
209
210 // If the Size/Alignment is not supported, replace with a libcall.
211 if (LI) {
212 if (!atomicSizeSupported(TLI, LI)) {
213 expandAtomicLoadToLibcall(LI);
214 MadeChange = true;
215 continue;
216 }
217 } else if (SI) {
218 if (!atomicSizeSupported(TLI, SI)) {
219 expandAtomicStoreToLibcall(SI);
220 MadeChange = true;
221 continue;
222 }
223 } else if (RMWI) {
224 if (!atomicSizeSupported(TLI, RMWI)) {
225 expandAtomicRMWToLibcall(RMWI);
226 MadeChange = true;
227 continue;
228 }
229 } else if (CASI) {
230 if (!atomicSizeSupported(TLI, CASI)) {
231 expandAtomicCASToLibcall(CASI);
232 MadeChange = true;
233 continue;
234 }
235 }
236
237 if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
238 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
239 I = LI = convertAtomicLoadToIntegerType(LI);
240 MadeChange = true;
241 } else if (SI &&
242 TLI->shouldCastAtomicStoreInIR(SI) ==
243 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
244 I = SI = convertAtomicStoreToIntegerType(SI);
245 MadeChange = true;
246 } else if (RMWI &&
247 TLI->shouldCastAtomicRMWIInIR(RMWI) ==
248 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
249 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
250 MadeChange = true;
251 } else if (CASI) {
252 // TODO: when we're ready to make the change at the IR level, we can
253 // extend convertCmpXchgToInteger for floating point too.
254 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
255 // TODO: add a TLI hook to control this so that each target can
256 // convert to lowering the original type one at a time.
257 I = CASI = convertCmpXchgToIntegerType(CASI);
258 MadeChange = true;
259 }
260 }
261
262 if (TLI->shouldInsertFencesForAtomic(I)) {
263 auto FenceOrdering = AtomicOrdering::Monotonic;
264 if (LI && isAcquireOrStronger(LI->getOrdering())) {
265 FenceOrdering = LI->getOrdering();
266 LI->setOrdering(AtomicOrdering::Monotonic);
267 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
268 FenceOrdering = SI->getOrdering();
269 SI->setOrdering(AtomicOrdering::Monotonic);
270 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
271 isAcquireOrStronger(RMWI->getOrdering()))) {
272 FenceOrdering = RMWI->getOrdering();
273 RMWI->setOrdering(AtomicOrdering::Monotonic);
274 } else if (CASI &&
275 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
276 TargetLoweringBase::AtomicExpansionKind::None &&
277 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
278 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
279 isAcquireOrStronger(CASI->getFailureOrdering()))) {
280 // If a compare and swap is lowered to LL/SC, we can do smarter fence
281 // insertion, with a stronger one on the success path than on the
282 // failure path. As a result, fence insertion is directly done by
283 // expandAtomicCmpXchg in that case.
284 FenceOrdering = CASI->getMergedOrdering();
285 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
286 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
287 }
288
289 if (FenceOrdering != AtomicOrdering::Monotonic) {
290 MadeChange |= bracketInstWithFences(I, FenceOrdering);
291 }
292 } else if (I->hasAtomicStore() &&
293 TLI->shouldInsertTrailingFenceForAtomicStore(I)) {
294 auto FenceOrdering = AtomicOrdering::Monotonic;
295 if (SI)
296 FenceOrdering = SI->getOrdering();
297 else if (RMWI)
298 FenceOrdering = RMWI->getOrdering();
299 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
300 TargetLoweringBase::AtomicExpansionKind::LLSC)
301 // LLSC is handled in expandAtomicCmpXchg().
302 FenceOrdering = CASI->getSuccessOrdering();
303
305 if (auto TrailingFence =
306 TLI->emitTrailingFence(Builder, I, FenceOrdering)) {
307 TrailingFence->moveAfter(I);
308 MadeChange = true;
309 }
310 }
311
312 if (LI)
313 MadeChange |= tryExpandAtomicLoad(LI);
314 else if (SI)
315 MadeChange |= tryExpandAtomicStore(SI);
316 else if (RMWI) {
317 // There are two different ways of expanding RMW instructions:
318 // - into a load if it is idempotent
319 // - into a Cmpxchg/LL-SC loop otherwise
320 // we try them in that order.
321
322 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
323 MadeChange = true;
324 } else {
325 AtomicRMWInst::BinOp Op = RMWI->getOperation();
326 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
327 unsigned ValueSize = getAtomicOpSize(RMWI);
328 if (ValueSize < MinCASSize &&
329 (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
330 Op == AtomicRMWInst::And)) {
331 RMWI = widenPartwordAtomicRMW(RMWI);
332 MadeChange = true;
333 }
334
335 MadeChange |= tryExpandAtomicRMW(RMWI);
336 }
337 } else if (CASI)
338 MadeChange |= tryExpandAtomicCmpXchg(CASI);
339 }
340 return MadeChange;
341}
342
343bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
344 ReplacementIRBuilder Builder(I, *DL);
345
346 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
347
348 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
349 // We have a guard here because not every atomic operation generates a
350 // trailing fence.
351 if (TrailingFence)
352 TrailingFence->moveAfter(I);
353
354 return (LeadingFence || TrailingFence);
355}
356
357/// Get the iX type with the same bitwidth as T.
358IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
359 const DataLayout &DL) {
360 EVT VT = TLI->getMemValueType(DL, T);
361 unsigned BitWidth = VT.getStoreSizeInBits();
362 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
363 return IntegerType::get(T->getContext(), BitWidth);
364}
365
366/// Convert an atomic load of a non-integral type to an integer load of the
367/// equivalent bitwidth. See the function comment on
368/// convertAtomicStoreToIntegerType for background.
369LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
370 auto *M = LI->getModule();
371 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
372
373 ReplacementIRBuilder Builder(LI, *DL);
374
376 Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
377 Value *NewAddr = Builder.CreateBitCast(Addr, PT);
378
379 auto *NewLI = Builder.CreateLoad(NewTy, NewAddr);
380 NewLI->setAlignment(LI->getAlign());
381 NewLI->setVolatile(LI->isVolatile());
382 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
383 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
384
385 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
386 LI->replaceAllUsesWith(NewVal);
387 LI->eraseFromParent();
388 return NewLI;
389}
390
392AtomicExpand::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
393 auto *M = RMWI->getModule();
394 Type *NewTy =
395 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
396
397 ReplacementIRBuilder Builder(RMWI, *DL);
398
399 Value *Addr = RMWI->getPointerOperand();
400 Value *Val = RMWI->getValOperand();
401 Type *PT = PointerType::get(NewTy, RMWI->getPointerAddressSpace());
402 Value *NewAddr = Builder.CreateBitCast(Addr, PT);
403 Value *NewVal = Val->getType()->isPointerTy()
404 ? Builder.CreatePtrToInt(Val, NewTy)
405 : Builder.CreateBitCast(Val, NewTy);
406
407 auto *NewRMWI =
408 Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, NewAddr, NewVal,
409 RMWI->getAlign(), RMWI->getOrdering());
410 NewRMWI->setVolatile(RMWI->isVolatile());
411 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
412
413 Value *NewRVal = RMWI->getType()->isPointerTy()
414 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
415 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
416 RMWI->replaceAllUsesWith(NewRVal);
417 RMWI->eraseFromParent();
418 return NewRMWI;
419}
420
421bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
422 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
423 case TargetLoweringBase::AtomicExpansionKind::None:
424 return false;
425 case TargetLoweringBase::AtomicExpansionKind::LLSC:
426 expandAtomicOpToLLSC(
427 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
428 LI->getOrdering(),
429 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
430 return true;
431 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
432 return expandAtomicLoadToLL(LI);
433 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
434 return expandAtomicLoadToCmpXchg(LI);
435 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
436 LI->setAtomic(AtomicOrdering::NotAtomic);
437 return true;
438 default:
439 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
440 }
441}
442
443bool AtomicExpand::tryExpandAtomicStore(StoreInst *SI) {
444 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
445 case TargetLoweringBase::AtomicExpansionKind::None:
446 return false;
447 case TargetLoweringBase::AtomicExpansionKind::Expand:
448 expandAtomicStore(SI);
449 return true;
450 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
451 SI->setAtomic(AtomicOrdering::NotAtomic);
452 return true;
453 default:
454 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
455 }
456}
457
458bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
459 ReplacementIRBuilder Builder(LI, *DL);
460
461 // On some architectures, load-linked instructions are atomic for larger
462 // sizes than normal loads. For example, the only 64-bit load guaranteed
463 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
464 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
465 LI->getPointerOperand(), LI->getOrdering());
466 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
467
468 LI->replaceAllUsesWith(Val);
469 LI->eraseFromParent();
470
471 return true;
472}
473
474bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
475 ReplacementIRBuilder Builder(LI, *DL);
476 AtomicOrdering Order = LI->getOrdering();
477 if (Order == AtomicOrdering::Unordered)
478 Order = AtomicOrdering::Monotonic;
479
481 Type *Ty = LI->getType();
482 Constant *DummyVal = Constant::getNullValue(Ty);
483
484 Value *Pair = Builder.CreateAtomicCmpXchg(
485 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
487 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
488
489 LI->replaceAllUsesWith(Loaded);
490 LI->eraseFromParent();
491
492 return true;
493}
494
495/// Convert an atomic store of a non-integral type to an integer store of the
496/// equivalent bitwidth. We used to not support floating point or vector
497/// atomics in the IR at all. The backends learned to deal with the bitcast
498/// idiom because that was the only way of expressing the notion of a atomic
499/// float or vector store. The long term plan is to teach each backend to
500/// instruction select from the original atomic store, but as a migration
501/// mechanism, we convert back to the old format which the backends understand.
502/// Each backend will need individual work to recognize the new format.
503StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
504 ReplacementIRBuilder Builder(SI, *DL);
505 auto *M = SI->getModule();
506 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
507 M->getDataLayout());
508 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
509
510 Value *Addr = SI->getPointerOperand();
511 Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
512 Value *NewAddr = Builder.CreateBitCast(Addr, PT);
513
514 StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
515 NewSI->setAlignment(SI->getAlign());
516 NewSI->setVolatile(SI->isVolatile());
517 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
518 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
519 SI->eraseFromParent();
520 return NewSI;
521}
522
523void AtomicExpand::expandAtomicStore(StoreInst *SI) {
524 // This function is only called on atomic stores that are too large to be
525 // atomic if implemented as a native store. So we replace them by an
526 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
527 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
528 // It is the responsibility of the target to only signal expansion via
529 // shouldExpandAtomicRMW in cases where this is required and possible.
530 ReplacementIRBuilder Builder(SI, *DL);
531 AtomicOrdering Ordering = SI->getOrdering();
532 assert(Ordering != AtomicOrdering::NotAtomic);
533 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
534 ? AtomicOrdering::Monotonic
535 : Ordering;
536 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
537 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
538 SI->getAlign(), RMWOrdering);
539 SI->eraseFromParent();
540
541 // Now we have an appropriate swap instruction, lower it as usual.
542 tryExpandAtomicRMW(AI);
543}
544
546 Value *Loaded, Value *NewVal, Align AddrAlign,
547 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
548 Value *&Success, Value *&NewLoaded) {
549 Type *OrigTy = NewVal->getType();
550
551 // This code can go away when cmpxchg supports FP types.
552 assert(!OrigTy->isPointerTy());
553 bool NeedBitcast = OrigTy->isFloatingPointTy();
554 if (NeedBitcast) {
555 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
556 unsigned AS = Addr->getType()->getPointerAddressSpace();
557 Addr = Builder.CreateBitCast(Addr, IntTy->getPointerTo(AS));
558 NewVal = Builder.CreateBitCast(NewVal, IntTy);
559 Loaded = Builder.CreateBitCast(Loaded, IntTy);
560 }
561
562 Value *Pair = Builder.CreateAtomicCmpXchg(
563 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
565 Success = Builder.CreateExtractValue(Pair, 1, "success");
566 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
567
568 if (NeedBitcast)
569 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
570}
571
572bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
573 LLVMContext &Ctx = AI->getModule()->getContext();
574 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
575 switch (Kind) {
576 case TargetLoweringBase::AtomicExpansionKind::None:
577 return false;
578 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
579 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
580 unsigned ValueSize = getAtomicOpSize(AI);
581 if (ValueSize < MinCASSize) {
582 expandPartwordAtomicRMW(AI,
583 TargetLoweringBase::AtomicExpansionKind::LLSC);
584 } else {
585 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
586 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
587 AI->getValOperand());
588 };
589 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
590 AI->getAlign(), AI->getOrdering(), PerformOp);
591 }
592 return true;
593 }
594 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
595 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
596 unsigned ValueSize = getAtomicOpSize(AI);
597 if (ValueSize < MinCASSize) {
598 expandPartwordAtomicRMW(AI,
599 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
600 } else {
602 Ctx.getSyncScopeNames(SSNs);
603 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
604 ? "system"
605 : SSNs[AI->getSyncScopeID()];
607 ORE.emit([&]() {
608 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
609 << "A compare and swap loop was generated for an atomic "
610 << AI->getOperationName(AI->getOperation()) << " operation at "
611 << MemScope << " memory scope";
612 });
614 }
615 return true;
616 }
617 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
618 expandAtomicRMWToMaskedIntrinsic(AI);
619 return true;
620 }
621 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
622 TLI->emitBitTestAtomicRMWIntrinsic(AI);
623 return true;
624 }
625 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
626 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
627 return true;
628 }
629 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
630 return lowerAtomicRMWInst(AI);
631 case TargetLoweringBase::AtomicExpansionKind::Expand:
632 TLI->emitExpandAtomicRMW(AI);
633 return true;
634 default:
635 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
636 }
637}
638
639namespace {
640
641struct PartwordMaskValues {
642 // These three fields are guaranteed to be set by createMaskInstrs.
643 Type *WordType = nullptr;
644 Type *ValueType = nullptr;
645 Type *IntValueType = nullptr;
646 Value *AlignedAddr = nullptr;
647 Align AlignedAddrAlignment;
648 // The remaining fields can be null.
649 Value *ShiftAmt = nullptr;
650 Value *Mask = nullptr;
651 Value *Inv_Mask = nullptr;
652};
653
655raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
656 auto PrintObj = [&O](auto *V) {
657 if (V)
658 O << *V;
659 else
660 O << "nullptr";
661 O << '\n';
662 };
663 O << "PartwordMaskValues {\n";
664 O << " WordType: ";
665 PrintObj(PMV.WordType);
666 O << " ValueType: ";
667 PrintObj(PMV.ValueType);
668 O << " AlignedAddr: ";
669 PrintObj(PMV.AlignedAddr);
670 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
671 O << " ShiftAmt: ";
672 PrintObj(PMV.ShiftAmt);
673 O << " Mask: ";
674 PrintObj(PMV.Mask);
675 O << " Inv_Mask: ";
676 PrintObj(PMV.Inv_Mask);
677 O << "}\n";
678 return O;
679}
680
681} // end anonymous namespace
682
683/// This is a helper function which builds instructions to provide
684/// values necessary for partword atomic operations. It takes an
685/// incoming address, Addr, and ValueType, and constructs the address,
686/// shift-amounts and masks needed to work with a larger value of size
687/// WordSize.
688///
689/// AlignedAddr: Addr rounded down to a multiple of WordSize
690///
691/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
692/// from AlignAddr for it to have the same value as if
693/// ValueType was loaded from Addr.
694///
695/// Mask: Value to mask with the value loaded from AlignAddr to
696/// include only the part that would've been loaded from Addr.
697///
698/// Inv_Mask: The inverse of Mask.
699static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
701 Value *Addr, Align AddrAlign,
702 unsigned MinWordSize) {
703 PartwordMaskValues PMV;
704
705 Module *M = I->getModule();
706 LLVMContext &Ctx = M->getContext();
707 const DataLayout &DL = M->getDataLayout();
708 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
709
710 PMV.ValueType = PMV.IntValueType = ValueType;
711 if (PMV.ValueType->isFloatingPointTy())
712 PMV.IntValueType =
713 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
714
715 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
716 : ValueType;
717 if (PMV.ValueType == PMV.WordType) {
718 PMV.AlignedAddr = Addr;
719 PMV.AlignedAddrAlignment = AddrAlign;
720 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
721 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
722 return PMV;
723 }
724
725 PMV.AlignedAddrAlignment = Align(MinWordSize);
726
727 assert(ValueSize < MinWordSize);
728
729 PointerType *PtrTy = cast<PointerType>(Addr->getType());
730 Type *WordPtrType = PMV.WordType->getPointerTo(PtrTy->getAddressSpace());
731 IntegerType *IntTy = DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
732 Value *PtrLSB;
733
734 if (AddrAlign < MinWordSize) {
735 PMV.AlignedAddr = Builder.CreateIntrinsic(
736 Intrinsic::ptrmask, {PtrTy, IntTy},
737 {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,
738 "AlignedAddr");
739
740 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
741 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
742 } else {
743 // If the alignment is high enough, the LSB are known 0.
744 PMV.AlignedAddr = Addr;
745 PtrLSB = ConstantInt::getNullValue(IntTy);
746 }
747
748 if (DL.isLittleEndian()) {
749 // turn bytes into bits
750 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
751 } else {
752 // turn bytes into bits, and count from the other side.
753 PMV.ShiftAmt = Builder.CreateShl(
754 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
755 }
756
757 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
758 PMV.Mask = Builder.CreateShl(
759 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
760 "Mask");
761
762 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
763
764 // Cast for typed pointers.
765 PMV.AlignedAddr =
766 Builder.CreateBitCast(PMV.AlignedAddr, WordPtrType, "AlignedAddr");
767
768 return PMV;
769}
770
771static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
772 const PartwordMaskValues &PMV) {
773 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
774 if (PMV.WordType == PMV.ValueType)
775 return WideWord;
776
777 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
778 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
779 return Builder.CreateBitCast(Trunc, PMV.ValueType);
780}
781
782static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
783 Value *Updated, const PartwordMaskValues &PMV) {
784 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
785 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
786 if (PMV.WordType == PMV.ValueType)
787 return Updated;
788
789 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
790
791 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
792 Value *Shift =
793 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
794 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
795 Value *Or = Builder.CreateOr(And, Shift, "inserted");
796 return Or;
797}
798
799/// Emit IR to implement a masked version of a given atomicrmw
800/// operation. (That is, only the bits under the Mask should be
801/// affected by the operation)
803 IRBuilderBase &Builder, Value *Loaded,
804 Value *Shifted_Inc, Value *Inc,
805 const PartwordMaskValues &PMV) {
806 // TODO: update to use
807 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
808 // to merge bits from two values without requiring PMV.Inv_Mask.
809 switch (Op) {
810 case AtomicRMWInst::Xchg: {
811 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
812 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
813 return FinalVal;
814 }
818 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
821 case AtomicRMWInst::Nand: {
822 // The other arithmetic ops need to be masked into place.
823 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
824 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
825 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
826 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
827 return FinalVal;
828 }
839 // Finally, other ops will operate on the full value, so truncate down to
840 // the original size, and expand out again after doing the
841 // operation. Bitcasts will be inserted for FP values.
842 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
843 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
844 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
845 return FinalVal;
846 }
847 default:
848 llvm_unreachable("Unknown atomic op");
849 }
850}
851
852/// Expand a sub-word atomicrmw operation into an appropriate
853/// word-sized operation.
854///
855/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
856/// way as a typical atomicrmw expansion. The only difference here is
857/// that the operation inside of the loop may operate upon only a
858/// part of the value.
859void AtomicExpand::expandPartwordAtomicRMW(
861 AtomicOrdering MemOpOrder = AI->getOrdering();
862 SyncScope::ID SSID = AI->getSyncScopeID();
863
864 ReplacementIRBuilder Builder(AI, *DL);
865
866 PartwordMaskValues PMV =
867 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
868 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
869
870 Value *ValOperand_Shifted = nullptr;
871 if (AI->getOperation() == AtomicRMWInst::Xchg ||
875 ValOperand_Shifted =
876 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
877 PMV.ShiftAmt, "ValOperand_Shifted");
878 }
879
880 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
881 return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
882 ValOperand_Shifted, AI->getValOperand(), PMV);
883 };
884
885 Value *OldResult;
886 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
887 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
888 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
889 PerformPartwordOp, createCmpXchgInstFun);
890 } else {
891 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
892 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
893 PMV.AlignedAddrAlignment, MemOpOrder,
894 PerformPartwordOp);
895 }
896
897 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
898 AI->replaceAllUsesWith(FinalOldResult);
899 AI->eraseFromParent();
900}
901
902// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
903AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
904 ReplacementIRBuilder Builder(AI, *DL);
906
908 Op == AtomicRMWInst::And) &&
909 "Unable to widen operation");
910
911 PartwordMaskValues PMV =
912 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
913 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
914
915 Value *ValOperand_Shifted =
916 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
917 PMV.ShiftAmt, "ValOperand_Shifted");
918
919 Value *NewOperand;
920
921 if (Op == AtomicRMWInst::And)
922 NewOperand =
923 Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
924 else
925 NewOperand = ValOperand_Shifted;
926
927 AtomicRMWInst *NewAI =
928 Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand,
929 PMV.AlignedAddrAlignment, AI->getOrdering());
930
931 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
932 AI->replaceAllUsesWith(FinalOldResult);
933 AI->eraseFromParent();
934 return NewAI;
935}
936
937bool AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
938 // The basic idea here is that we're expanding a cmpxchg of a
939 // smaller memory size up to a word-sized cmpxchg. To do this, we
940 // need to add a retry-loop for strong cmpxchg, so that
941 // modifications to other parts of the word don't cause a spurious
942 // failure.
943
944 // This generates code like the following:
945 // [[Setup mask values PMV.*]]
946 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
947 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
948 // %InitLoaded = load i32* %addr
949 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
950 // br partword.cmpxchg.loop
951 // partword.cmpxchg.loop:
952 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
953 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
954 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
955 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
956 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
957 // i32 %FullWord_NewVal success_ordering failure_ordering
958 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
959 // %Success = extractvalue { i32, i1 } %NewCI, 1
960 // br i1 %Success, label %partword.cmpxchg.end,
961 // label %partword.cmpxchg.failure
962 // partword.cmpxchg.failure:
963 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
964 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
965 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
966 // label %partword.cmpxchg.end
967 // partword.cmpxchg.end:
968 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
969 // %FinalOldVal = trunc i32 %tmp1 to i8
970 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
971 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
972
974 Value *Cmp = CI->getCompareOperand();
975 Value *NewVal = CI->getNewValOperand();
976
977 BasicBlock *BB = CI->getParent();
978 Function *F = BB->getParent();
979 ReplacementIRBuilder Builder(CI, *DL);
980 LLVMContext &Ctx = Builder.getContext();
981
982 BasicBlock *EndBB =
983 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
984 auto FailureBB =
985 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
986 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
987
988 // The split call above "helpfully" added a branch at the end of BB
989 // (to the wrong place).
990 std::prev(BB->end())->eraseFromParent();
991 Builder.SetInsertPoint(BB);
992
993 PartwordMaskValues PMV =
994 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
995 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
996
997 // Shift the incoming values over, into the right location in the word.
998 Value *NewVal_Shifted =
999 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1000 Value *Cmp_Shifted =
1001 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1002
1003 // Load the entire current word, and mask into place the expected and new
1004 // values
1005 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1006 InitLoaded->setVolatile(CI->isVolatile());
1007 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1008 Builder.CreateBr(LoopBB);
1009
1010 // partword.cmpxchg.loop:
1011 Builder.SetInsertPoint(LoopBB);
1012 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1013 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1014
1015 // Mask/Or the expected and new values into place in the loaded word.
1016 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1017 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1018 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1019 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1021 NewCI->setVolatile(CI->isVolatile());
1022 // When we're building a strong cmpxchg, we need a loop, so you
1023 // might think we could use a weak cmpxchg inside. But, using strong
1024 // allows the below comparison for ShouldContinue, and we're
1025 // expecting the underlying cmpxchg to be a machine instruction,
1026 // which is strong anyways.
1027 NewCI->setWeak(CI->isWeak());
1028
1029 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1030 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1031
1032 if (CI->isWeak())
1033 Builder.CreateBr(EndBB);
1034 else
1035 Builder.CreateCondBr(Success, EndBB, FailureBB);
1036
1037 // partword.cmpxchg.failure:
1038 Builder.SetInsertPoint(FailureBB);
1039 // Upon failure, verify that the masked-out part of the loaded value
1040 // has been modified. If it didn't, abort the cmpxchg, since the
1041 // masked-in part must've.
1042 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1043 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1044 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1045
1046 // Add the second value to the phi from above
1047 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1048
1049 // partword.cmpxchg.end:
1050 Builder.SetInsertPoint(CI);
1051
1052 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1053 Value *Res = PoisonValue::get(CI->getType());
1054 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1055 Res = Builder.CreateInsertValue(Res, Success, 1);
1056
1057 CI->replaceAllUsesWith(Res);
1058 CI->eraseFromParent();
1059 return true;
1060}
1061
1062void AtomicExpand::expandAtomicOpToLLSC(
1063 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1064 AtomicOrdering MemOpOrder,
1065 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1066 ReplacementIRBuilder Builder(I, *DL);
1067 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1068 MemOpOrder, PerformOp);
1069
1070 I->replaceAllUsesWith(Loaded);
1071 I->eraseFromParent();
1072}
1073
1074void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1075 ReplacementIRBuilder Builder(AI, *DL);
1076
1077 PartwordMaskValues PMV =
1078 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1079 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1080
1081 // The value operand must be sign-extended for signed min/max so that the
1082 // target's signed comparison instructions can be used. Otherwise, just
1083 // zero-ext.
1084 Instruction::CastOps CastOp = Instruction::ZExt;
1085 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1086 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1087 CastOp = Instruction::SExt;
1088
1089 Value *ValOperand_Shifted = Builder.CreateShl(
1090 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1091 PMV.ShiftAmt, "ValOperand_Shifted");
1092 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1093 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1094 AI->getOrdering());
1095 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1096 AI->replaceAllUsesWith(FinalOldResult);
1097 AI->eraseFromParent();
1098}
1099
1100void AtomicExpand::expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI) {
1101 ReplacementIRBuilder Builder(CI, *DL);
1102
1103 PartwordMaskValues PMV = createMaskInstrs(
1104 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1105 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1106
1107 Value *CmpVal_Shifted = Builder.CreateShl(
1108 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1109 "CmpVal_Shifted");
1110 Value *NewVal_Shifted = Builder.CreateShl(
1111 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1112 "NewVal_Shifted");
1113 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1114 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1115 CI->getMergedOrdering());
1116 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1117 Value *Res = PoisonValue::get(CI->getType());
1118 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1119 Value *Success = Builder.CreateICmpEQ(
1120 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1121 Res = Builder.CreateInsertValue(Res, Success, 1);
1122
1123 CI->replaceAllUsesWith(Res);
1124 CI->eraseFromParent();
1125}
1126
1127Value *AtomicExpand::insertRMWLLSCLoop(
1128 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1129 AtomicOrdering MemOpOrder,
1130 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1131 LLVMContext &Ctx = Builder.getContext();
1132 BasicBlock *BB = Builder.GetInsertBlock();
1133 Function *F = BB->getParent();
1134
1135 assert(AddrAlign >=
1136 F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1137 "Expected at least natural alignment at this point.");
1138
1139 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1140 //
1141 // The standard expansion we produce is:
1142 // [...]
1143 // atomicrmw.start:
1144 // %loaded = @load.linked(%addr)
1145 // %new = some_op iN %loaded, %incr
1146 // %stored = @store_conditional(%new, %addr)
1147 // %try_again = icmp i32 ne %stored, 0
1148 // br i1 %try_again, label %loop, label %atomicrmw.end
1149 // atomicrmw.end:
1150 // [...]
1151 BasicBlock *ExitBB =
1152 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1153 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1154
1155 // The split call above "helpfully" added a branch at the end of BB (to the
1156 // wrong place).
1157 std::prev(BB->end())->eraseFromParent();
1158 Builder.SetInsertPoint(BB);
1159 Builder.CreateBr(LoopBB);
1160
1161 // Start the main loop block now that we've taken care of the preliminaries.
1162 Builder.SetInsertPoint(LoopBB);
1163 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1164
1165 Value *NewVal = PerformOp(Builder, Loaded);
1166
1167 Value *StoreSuccess =
1168 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1169 Value *TryAgain = Builder.CreateICmpNE(
1170 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1171 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1172
1173 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1174 return Loaded;
1175}
1176
1177/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1178/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1179/// IR. As a migration step, we convert back to what use to be the standard
1180/// way to represent a pointer cmpxchg so that we can update backends one by
1181/// one.
1183AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1184 auto *M = CI->getModule();
1185 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1186 M->getDataLayout());
1187
1188 ReplacementIRBuilder Builder(CI, *DL);
1189
1190 Value *Addr = CI->getPointerOperand();
1191 Type *PT = PointerType::get(NewTy, Addr->getType()->getPointerAddressSpace());
1192 Value *NewAddr = Builder.CreateBitCast(Addr, PT);
1193
1194 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1195 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1196
1197 auto *NewCI = Builder.CreateAtomicCmpXchg(
1198 NewAddr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1199 CI->getFailureOrdering(), CI->getSyncScopeID());
1200 NewCI->setVolatile(CI->isVolatile());
1201 NewCI->setWeak(CI->isWeak());
1202 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1203
1204 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1205 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1206
1207 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1208
1209 Value *Res = PoisonValue::get(CI->getType());
1210 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1211 Res = Builder.CreateInsertValue(Res, Succ, 1);
1212
1213 CI->replaceAllUsesWith(Res);
1214 CI->eraseFromParent();
1215 return NewCI;
1216}
1217
1218bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1219 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1220 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1221 Value *Addr = CI->getPointerOperand();
1222 BasicBlock *BB = CI->getParent();
1223 Function *F = BB->getParent();
1224 LLVMContext &Ctx = F->getContext();
1225 // If shouldInsertFencesForAtomic() returns true, then the target does not
1226 // want to deal with memory orders, and emitLeading/TrailingFence should take
1227 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1228 // should preserve the ordering.
1229 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1230 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1231 ? AtomicOrdering::Monotonic
1232 : CI->getMergedOrdering();
1233
1234 // In implementations which use a barrier to achieve release semantics, we can
1235 // delay emitting this barrier until we know a store is actually going to be
1236 // attempted. The cost of this delay is that we need 2 copies of the block
1237 // emitting the load-linked, affecting code size.
1238 //
1239 // Ideally, this logic would be unconditional except for the minsize check
1240 // since in other cases the extra blocks naturally collapse down to the
1241 // minimal loop. Unfortunately, this puts too much stress on later
1242 // optimisations so we avoid emitting the extra logic in those cases too.
1243 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1244 SuccessOrder != AtomicOrdering::Monotonic &&
1245 SuccessOrder != AtomicOrdering::Acquire &&
1246 !F->hasMinSize();
1247
1248 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1249 // do it even on minsize.
1250 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1251
1252 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1253 //
1254 // The full expansion we produce is:
1255 // [...]
1256 // %aligned.addr = ...
1257 // cmpxchg.start:
1258 // %unreleasedload = @load.linked(%aligned.addr)
1259 // %unreleasedload.extract = extract value from %unreleasedload
1260 // %should_store = icmp eq %unreleasedload.extract, %desired
1261 // br i1 %should_store, label %cmpxchg.releasingstore,
1262 // label %cmpxchg.nostore
1263 // cmpxchg.releasingstore:
1264 // fence?
1265 // br label cmpxchg.trystore
1266 // cmpxchg.trystore:
1267 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1268 // [%releasedload, %cmpxchg.releasedload]
1269 // %updated.new = insert %new into %loaded.trystore
1270 // %stored = @store_conditional(%updated.new, %aligned.addr)
1271 // %success = icmp eq i32 %stored, 0
1272 // br i1 %success, label %cmpxchg.success,
1273 // label %cmpxchg.releasedload/%cmpxchg.failure
1274 // cmpxchg.releasedload:
1275 // %releasedload = @load.linked(%aligned.addr)
1276 // %releasedload.extract = extract value from %releasedload
1277 // %should_store = icmp eq %releasedload.extract, %desired
1278 // br i1 %should_store, label %cmpxchg.trystore,
1279 // label %cmpxchg.failure
1280 // cmpxchg.success:
1281 // fence?
1282 // br label %cmpxchg.end
1283 // cmpxchg.nostore:
1284 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1285 // [%releasedload,
1286 // %cmpxchg.releasedload/%cmpxchg.trystore]
1287 // @load_linked_fail_balance()?
1288 // br label %cmpxchg.failure
1289 // cmpxchg.failure:
1290 // fence?
1291 // br label %cmpxchg.end
1292 // cmpxchg.end:
1293 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1294 // [%loaded.trystore, %cmpxchg.trystore]
1295 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1296 // %loaded = extract value from %loaded.exit
1297 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1298 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1299 // [...]
1300 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1301 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1302 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1303 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1304 auto ReleasedLoadBB =
1305 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1306 auto TryStoreBB =
1307 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1308 auto ReleasingStoreBB =
1309 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1310 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1311
1312 ReplacementIRBuilder Builder(CI, *DL);
1313
1314 // The split call above "helpfully" added a branch at the end of BB (to the
1315 // wrong place), but we might want a fence too. It's easiest to just remove
1316 // the branch entirely.
1317 std::prev(BB->end())->eraseFromParent();
1318 Builder.SetInsertPoint(BB);
1319 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1320 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1321
1322 PartwordMaskValues PMV =
1323 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1324 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1325 Builder.CreateBr(StartBB);
1326
1327 // Start the main loop block now that we've taken care of the preliminaries.
1328 Builder.SetInsertPoint(StartBB);
1329 Value *UnreleasedLoad =
1330 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1331 Value *UnreleasedLoadExtract =
1332 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1333 Value *ShouldStore = Builder.CreateICmpEQ(
1334 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1335
1336 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1337 // jump straight past that fence instruction (if it exists).
1338 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1339
1340 Builder.SetInsertPoint(ReleasingStoreBB);
1341 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1342 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1343 Builder.CreateBr(TryStoreBB);
1344
1345 Builder.SetInsertPoint(TryStoreBB);
1346 PHINode *LoadedTryStore =
1347 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1348 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1349 Value *NewValueInsert =
1350 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1351 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1352 PMV.AlignedAddr, MemOpOrder);
1353 StoreSuccess = Builder.CreateICmpEQ(
1354 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1355 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1356 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1357 CI->isWeak() ? FailureBB : RetryBB);
1358
1359 Builder.SetInsertPoint(ReleasedLoadBB);
1360 Value *SecondLoad;
1361 if (HasReleasedLoadBB) {
1362 SecondLoad =
1363 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1364 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1365 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1366 CI->getCompareOperand(), "should_store");
1367
1368 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1369 // jump straight past that fence instruction (if it exists).
1370 Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1371 // Update PHI node in TryStoreBB.
1372 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1373 } else
1374 Builder.CreateUnreachable();
1375
1376 // Make sure later instructions don't get reordered with a fence if
1377 // necessary.
1378 Builder.SetInsertPoint(SuccessBB);
1379 if (ShouldInsertFencesForAtomic ||
1380 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1381 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1382 Builder.CreateBr(ExitBB);
1383
1384 Builder.SetInsertPoint(NoStoreBB);
1385 PHINode *LoadedNoStore =
1386 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1387 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1388 if (HasReleasedLoadBB)
1389 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1390
1391 // In the failing case, where we don't execute the store-conditional, the
1392 // target might want to balance out the load-linked with a dedicated
1393 // instruction (e.g., on ARM, clearing the exclusive monitor).
1394 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1395 Builder.CreateBr(FailureBB);
1396
1397 Builder.SetInsertPoint(FailureBB);
1398 PHINode *LoadedFailure =
1399 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1400 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1401 if (CI->isWeak())
1402 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1403 if (ShouldInsertFencesForAtomic)
1404 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1405 Builder.CreateBr(ExitBB);
1406
1407 // Finally, we have control-flow based knowledge of whether the cmpxchg
1408 // succeeded or not. We expose this to later passes by converting any
1409 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1410 // PHI.
1411 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1412 PHINode *LoadedExit =
1413 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1414 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1415 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1416 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1417 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1418 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1419
1420 // This is the "exit value" from the cmpxchg expansion. It may be of
1421 // a type wider than the one in the cmpxchg instruction.
1422 Value *LoadedFull = LoadedExit;
1423
1424 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1425 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1426
1427 // Look for any users of the cmpxchg that are just comparing the loaded value
1428 // against the desired one, and replace them with the CFG-derived version.
1430 for (auto *User : CI->users()) {
1431 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1432 if (!EV)
1433 continue;
1434
1435 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1436 "weird extraction from { iN, i1 }");
1437
1438 if (EV->getIndices()[0] == 0)
1439 EV->replaceAllUsesWith(Loaded);
1440 else
1442
1443 PrunedInsts.push_back(EV);
1444 }
1445
1446 // We can remove the instructions now we're no longer iterating through them.
1447 for (auto *EV : PrunedInsts)
1448 EV->eraseFromParent();
1449
1450 if (!CI->use_empty()) {
1451 // Some use of the full struct return that we don't understand has happened,
1452 // so we've got to reconstruct it properly.
1453 Value *Res;
1454 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1455 Res = Builder.CreateInsertValue(Res, Success, 1);
1456
1457 CI->replaceAllUsesWith(Res);
1458 }
1459
1460 CI->eraseFromParent();
1461 return true;
1462}
1463
1464bool AtomicExpand::isIdempotentRMW(AtomicRMWInst *RMWI) {
1465 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1466 if (!C)
1467 return false;
1468
1470 switch (Op) {
1471 case AtomicRMWInst::Add:
1472 case AtomicRMWInst::Sub:
1473 case AtomicRMWInst::Or:
1474 case AtomicRMWInst::Xor:
1475 return C->isZero();
1476 case AtomicRMWInst::And:
1477 return C->isMinusOne();
1478 // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1479 default:
1480 return false;
1481 }
1482}
1483
1484bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1485 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1486 tryExpandAtomicLoad(ResultingLoad);
1487 return true;
1488 }
1489 return false;
1490}
1491
1492Value *AtomicExpand::insertRMWCmpXchgLoop(
1493 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1494 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1495 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1496 CreateCmpXchgInstFun CreateCmpXchg) {
1497 LLVMContext &Ctx = Builder.getContext();
1498 BasicBlock *BB = Builder.GetInsertBlock();
1499 Function *F = BB->getParent();
1500
1501 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1502 //
1503 // The standard expansion we produce is:
1504 // [...]
1505 // %init_loaded = load atomic iN* %addr
1506 // br label %loop
1507 // loop:
1508 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1509 // %new = some_op iN %loaded, %incr
1510 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1511 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1512 // %success = extractvalue { iN, i1 } %pair, 1
1513 // br i1 %success, label %atomicrmw.end, label %loop
1514 // atomicrmw.end:
1515 // [...]
1516 BasicBlock *ExitBB =
1517 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1518 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1519
1520 // The split call above "helpfully" added a branch at the end of BB (to the
1521 // wrong place), but we want a load. It's easiest to just remove
1522 // the branch entirely.
1523 std::prev(BB->end())->eraseFromParent();
1524 Builder.SetInsertPoint(BB);
1525 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1526 Builder.CreateBr(LoopBB);
1527
1528 // Start the main loop block now that we've taken care of the preliminaries.
1529 Builder.SetInsertPoint(LoopBB);
1530 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1531 Loaded->addIncoming(InitLoaded, BB);
1532
1533 Value *NewVal = PerformOp(Builder, Loaded);
1534
1535 Value *NewLoaded = nullptr;
1536 Value *Success = nullptr;
1537
1538 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1539 MemOpOrder == AtomicOrdering::Unordered
1540 ? AtomicOrdering::Monotonic
1541 : MemOpOrder,
1542 SSID, Success, NewLoaded);
1543 assert(Success && NewLoaded);
1544
1545 Loaded->addIncoming(NewLoaded, LoopBB);
1546
1547 Builder.CreateCondBr(Success, ExitBB, LoopBB);
1548
1549 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1550 return NewLoaded;
1551}
1552
1553bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1554 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1555 unsigned ValueSize = getAtomicOpSize(CI);
1556
1557 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1558 default:
1559 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1560 case TargetLoweringBase::AtomicExpansionKind::None:
1561 if (ValueSize < MinCASSize)
1562 return expandPartwordCmpXchg(CI);
1563 return false;
1564 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1565 return expandAtomicCmpXchg(CI);
1566 }
1567 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1568 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1569 return true;
1570 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1571 return lowerAtomicCmpXchgInst(CI);
1572 }
1573}
1574
1575// Note: This function is exposed externally by AtomicExpandUtils.h
1577 CreateCmpXchgInstFun CreateCmpXchg) {
1578 ReplacementIRBuilder Builder(AI, AI->getModule()->getDataLayout());
1579 Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1580 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1581 AI->getOrdering(), AI->getSyncScopeID(),
1582 [&](IRBuilderBase &Builder, Value *Loaded) {
1583 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1584 AI->getValOperand());
1585 },
1586 CreateCmpXchg);
1587
1588 AI->replaceAllUsesWith(Loaded);
1589 AI->eraseFromParent();
1590 return true;
1591}
1592
1593// In order to use one of the sized library calls such as
1594// __atomic_fetch_add_4, the alignment must be sufficient, the size
1595// must be one of the potentially-specialized sizes, and the value
1596// type must actually exist in C on the target (otherwise, the
1597// function wouldn't actually be defined.)
1598static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1599 const DataLayout &DL) {
1600 // TODO: "LargestSize" is an approximation for "largest type that
1601 // you can express in C". It seems to be the case that int128 is
1602 // supported on all 64-bit platforms, otherwise only up to 64-bit
1603 // integers are supported. If we get this wrong, then we'll try to
1604 // call a sized libcall that doesn't actually exist. There should
1605 // really be some more reliable way in LLVM of determining integer
1606 // sizes which are valid in the target's C ABI...
1607 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1608 return Alignment >= Size &&
1609 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1610 Size <= LargestSize;
1611}
1612
1613void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1614 static const RTLIB::Libcall Libcalls[6] = {
1615 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1616 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1617 unsigned Size = getAtomicOpSize(I);
1618
1619 bool expanded = expandAtomicOpToLibcall(
1620 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1621 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1622 if (!expanded)
1623 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");
1624}
1625
1626void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1627 static const RTLIB::Libcall Libcalls[6] = {
1628 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1629 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1630 unsigned Size = getAtomicOpSize(I);
1631
1632 bool expanded = expandAtomicOpToLibcall(
1633 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1634 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1635 if (!expanded)
1636 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");
1637}
1638
1639void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1640 static const RTLIB::Libcall Libcalls[6] = {
1641 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1642 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1643 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1644 unsigned Size = getAtomicOpSize(I);
1645
1646 bool expanded = expandAtomicOpToLibcall(
1647 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1648 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1649 Libcalls);
1650 if (!expanded)
1651 report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");
1652}
1653
1655 static const RTLIB::Libcall LibcallsXchg[6] = {
1656 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1657 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1658 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1659 static const RTLIB::Libcall LibcallsAdd[6] = {
1660 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1661 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1662 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1663 static const RTLIB::Libcall LibcallsSub[6] = {
1664 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1665 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1666 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1667 static const RTLIB::Libcall LibcallsAnd[6] = {
1668 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1669 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1670 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1671 static const RTLIB::Libcall LibcallsOr[6] = {
1672 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1673 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1674 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1675 static const RTLIB::Libcall LibcallsXor[6] = {
1676 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1677 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1678 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1679 static const RTLIB::Libcall LibcallsNand[6] = {
1680 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1681 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1682 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1683
1684 switch (Op) {
1686 llvm_unreachable("Should not have BAD_BINOP.");
1688 return ArrayRef(LibcallsXchg);
1689 case AtomicRMWInst::Add:
1690 return ArrayRef(LibcallsAdd);
1691 case AtomicRMWInst::Sub:
1692 return ArrayRef(LibcallsSub);
1693 case AtomicRMWInst::And:
1694 return ArrayRef(LibcallsAnd);
1695 case AtomicRMWInst::Or:
1696 return ArrayRef(LibcallsOr);
1697 case AtomicRMWInst::Xor:
1698 return ArrayRef(LibcallsXor);
1700 return ArrayRef(LibcallsNand);
1701 case AtomicRMWInst::Max:
1702 case AtomicRMWInst::Min:
1711 // No atomic libcalls are available for max/min/umax/umin.
1712 return {};
1713 }
1714 llvm_unreachable("Unexpected AtomicRMW operation.");
1715}
1716
1717void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1718 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1719
1720 unsigned Size = getAtomicOpSize(I);
1721
1722 bool Success = false;
1723 if (!Libcalls.empty())
1724 Success = expandAtomicOpToLibcall(
1725 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1726 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1727
1728 // The expansion failed: either there were no libcalls at all for
1729 // the operation (min/max), or there were only size-specialized
1730 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1731 // CAS libcall, via a CAS loop, instead.
1732 if (!Success) {
1734 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1735 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1736 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded) {
1737 // Create the CAS instruction normally...
1738 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1739 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1741 Success = Builder.CreateExtractValue(Pair, 1, "success");
1742 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1743
1744 // ...and then expand the CAS into a libcall.
1745 expandAtomicCASToLibcall(Pair);
1746 });
1747 }
1748}
1749
1750// A helper routine for the above expandAtomic*ToLibcall functions.
1751//
1752// 'Libcalls' contains an array of enum values for the particular
1753// ATOMIC libcalls to be emitted. All of the other arguments besides
1754// 'I' are extracted from the Instruction subclass by the
1755// caller. Depending on the particular call, some will be null.
1756bool AtomicExpand::expandAtomicOpToLibcall(
1757 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1758 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1759 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1760 assert(Libcalls.size() == 6);
1761
1762 LLVMContext &Ctx = I->getContext();
1763 Module *M = I->getModule();
1764 const DataLayout &DL = M->getDataLayout();
1766 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1767
1768 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1769 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1770
1771 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1772
1773 // TODO: the "order" argument type is "int", not int32. So
1774 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1776 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1777 Constant *OrderingVal =
1778 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1779 Constant *Ordering2Val = nullptr;
1780 if (CASExpected) {
1781 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1782 Ordering2Val =
1783 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1784 }
1785 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1786
1787 RTLIB::Libcall RTLibType;
1788 if (UseSizedLibcall) {
1789 switch (Size) {
1790 case 1:
1791 RTLibType = Libcalls[1];
1792 break;
1793 case 2:
1794 RTLibType = Libcalls[2];
1795 break;
1796 case 4:
1797 RTLibType = Libcalls[3];
1798 break;
1799 case 8:
1800 RTLibType = Libcalls[4];
1801 break;
1802 case 16:
1803 RTLibType = Libcalls[5];
1804 break;
1805 }
1806 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1807 RTLibType = Libcalls[0];
1808 } else {
1809 // Can't use sized function, and there's no generic for this
1810 // operation, so give up.
1811 return false;
1812 }
1813
1814 if (!TLI->getLibcallName(RTLibType)) {
1815 // This target does not implement the requested atomic libcall so give up.
1816 return false;
1817 }
1818
1819 // Build up the function call. There's two kinds. First, the sized
1820 // variants. These calls are going to be one of the following (with
1821 // N=1,2,4,8,16):
1822 // iN __atomic_load_N(iN *ptr, int ordering)
1823 // void __atomic_store_N(iN *ptr, iN val, int ordering)
1824 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1825 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1826 // int success_order, int failure_order)
1827 //
1828 // Note that these functions can be used for non-integer atomic
1829 // operations, the values just need to be bitcast to integers on the
1830 // way in and out.
1831 //
1832 // And, then, the generic variants. They look like the following:
1833 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1834 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1835 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1836 // int ordering)
1837 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1838 // void *desired, int success_order,
1839 // int failure_order)
1840 //
1841 // The different signatures are built up depending on the
1842 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1843 // variables.
1844
1845 AllocaInst *AllocaCASExpected = nullptr;
1846 Value *AllocaCASExpected_i8 = nullptr;
1847 AllocaInst *AllocaValue = nullptr;
1848 Value *AllocaValue_i8 = nullptr;
1849 AllocaInst *AllocaResult = nullptr;
1850 Value *AllocaResult_i8 = nullptr;
1851
1852 Type *ResultTy;
1854 AttributeList Attr;
1855
1856 // 'size' argument.
1857 if (!UseSizedLibcall) {
1858 // Note, getIntPtrType is assumed equivalent to size_t.
1859 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1860 }
1861
1862 // 'ptr' argument.
1863 // note: This assumes all address spaces share a common libfunc
1864 // implementation and that addresses are convertable. For systems without
1865 // that property, we'd need to extend this mechanism to support AS-specific
1866 // families of atomic intrinsics.
1867 auto PtrTypeAS = PointerOperand->getType()->getPointerAddressSpace();
1868 Value *PtrVal =
1869 Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx, PtrTypeAS));
1870 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, Type::getInt8PtrTy(Ctx));
1871 Args.push_back(PtrVal);
1872
1873 // 'expected' argument, if present.
1874 if (CASExpected) {
1875 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1876 AllocaCASExpected->setAlignment(AllocaAlignment);
1877 unsigned AllocaAS = AllocaCASExpected->getType()->getPointerAddressSpace();
1878
1879 AllocaCASExpected_i8 = Builder.CreateBitCast(
1880 AllocaCASExpected, Type::getInt8PtrTy(Ctx, AllocaAS));
1881 Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1882 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1883 Args.push_back(AllocaCASExpected_i8);
1884 }
1885
1886 // 'val' argument ('desired' for cas), if present.
1887 if (ValueOperand) {
1888 if (UseSizedLibcall) {
1889 Value *IntValue =
1890 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1891 Args.push_back(IntValue);
1892 } else {
1893 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1894 AllocaValue->setAlignment(AllocaAlignment);
1895 AllocaValue_i8 =
1896 Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
1897 Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1898 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1899 Args.push_back(AllocaValue_i8);
1900 }
1901 }
1902
1903 // 'ret' argument.
1904 if (!CASExpected && HasResult && !UseSizedLibcall) {
1905 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1906 AllocaResult->setAlignment(AllocaAlignment);
1907 unsigned AllocaAS = AllocaResult->getType()->getPointerAddressSpace();
1908 AllocaResult_i8 =
1909 Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx, AllocaAS));
1910 Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1911 Args.push_back(AllocaResult_i8);
1912 }
1913
1914 // 'ordering' ('success_order' for cas) argument.
1915 Args.push_back(OrderingVal);
1916
1917 // 'failure_order' argument, if present.
1918 if (Ordering2Val)
1919 Args.push_back(Ordering2Val);
1920
1921 // Now, the return type.
1922 if (CASExpected) {
1923 ResultTy = Type::getInt1Ty(Ctx);
1924 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
1925 } else if (HasResult && UseSizedLibcall)
1926 ResultTy = SizedIntTy;
1927 else
1928 ResultTy = Type::getVoidTy(Ctx);
1929
1930 // Done with setting up arguments and return types, create the call:
1932 for (Value *Arg : Args)
1933 ArgTys.push_back(Arg->getType());
1934 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1935 FunctionCallee LibcallFn =
1936 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1937 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1938 Call->setAttributes(Attr);
1939 Value *Result = Call;
1940
1941 // And then, extract the results...
1942 if (ValueOperand && !UseSizedLibcall)
1943 Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1944
1945 if (CASExpected) {
1946 // The final result from the CAS is {load of 'expected' alloca, bool result
1947 // from call}
1948 Type *FinalResultTy = I->getType();
1949 Value *V = PoisonValue::get(FinalResultTy);
1950 Value *ExpectedOut = Builder.CreateAlignedLoad(
1951 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
1952 Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1953 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1954 V = Builder.CreateInsertValue(V, Result, 1);
1955 I->replaceAllUsesWith(V);
1956 } else if (HasResult) {
1957 Value *V;
1958 if (UseSizedLibcall)
1959 V = Builder.CreateBitOrPointerCast(Result, I->getType());
1960 else {
1961 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
1962 AllocaAlignment);
1963 Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1964 }
1965 I->replaceAllUsesWith(V);
1966 }
1967 I->eraseFromParent();
1968 return true;
1969}
#define Success
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
assume Assume Builder
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static unsigned getAtomicOpSize(LoadInst *LI)
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
#define DEBUG_TYPE
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:172
This file contains the declarations for the subclasses of Constant, which represent the different fla...
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
Module.h This file contains the declarations for the Module class.
print must be executed print the must be executed context for all instructions
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
@ SI
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
Definition: Instructions.h:58
PointerType * getType() const
Overload to return most specific pointer type.
Definition: Instructions.h:100
void setAlignment(Align Align)
Definition: Instructions.h:129
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:513
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
Definition: Instructions.h:621
void setWeak(bool IsWeak)
Definition: Instructions.h:578
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:569
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:608
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:666
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:558
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:576
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:573
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:596
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:634
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:718
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:834
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
Definition: Instructions.h:844
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:730
@ Add
*p = old + v
Definition: Instructions.h:734
@ FAdd
*p = old + v
Definition: Instructions.h:755
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:748
@ Or
*p = old | v
Definition: Instructions.h:742
@ Sub
*p = old - v
Definition: Instructions.h:736
@ And
*p = old & v
Definition: Instructions.h:738
@ Xor
*p = old ^ v
Definition: Instructions.h:744
@ FSub
*p = old - v
Definition: Instructions.h:758
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:770
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:746
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:752
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:766
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:750
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:762
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:774
@ Nand
*p = ~(old & v)
Definition: Instructions.h:740
Value * getPointerOperand()
Definition: Instructions.h:877
BinOp getOperation() const
Definition: Instructions.h:812
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:868
Value * getValOperand()
Definition: Instructions.h:881
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:854
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:885
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
Definition: Attributes.h:548
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
iterator end()
Definition: BasicBlock.h:316
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:314
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:105
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:401
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:112
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
Definition: Constants.h:78
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:835
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:887
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:842
This is an important base class in LLVM.
Definition: Constant.h:41
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:356
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:114
This instruction extracts a struct member or array element value from an aggregate value.
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:165
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:219
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:180
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2550
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:70
const BasicBlock * getParent() const
Definition: Instruction.h:90
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:74
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:82
Class to represent integer types.
Definition: DerivedTypes.h:40
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:325
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Definition: Instructions.h:177
Value * getPointerOperand()
Definition: Instructions.h:264
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:214
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:250
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:229
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:217
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:239
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:220
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:262
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
Definition: Module.cpp:398
The optimization diagnostic interface.
Diagnostic information for applied optimization remarks.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1759
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
An instruction for storing to memory.
Definition: Instructions.h:301
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:340
void setAlignment(Align Align)
Definition: Instructions.h:349
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:376
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:249
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
iterator_range< user_iterator > users()
Definition: Value.h:421
bool use_empty() const
Definition: Value.h:344
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
Definition: ilist_node.h:82
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
FunctionPass * createAtomicExpandPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
void initializeAtomicExpandPass(PassRegistry &)
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
Definition: LowerAtomic.cpp:42
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
Definition: LowerAtomic.cpp:23
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:340
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:379