LLVM 23.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/Instruction.h"
38#include "llvm/IR/MDBuilder.h"
40#include "llvm/IR/Module.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/User.h"
44#include "llvm/IR/Value.h"
46#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "atomic-expand"
61
62namespace {
63
64class AtomicExpandImpl {
65 const TargetLowering *TLI = nullptr;
66 const LibcallLoweringInfo *LibcallLowering = nullptr;
67 const DataLayout *DL = nullptr;
68
69private:
70 void handleFailure(Instruction &FailedInst, const Twine &Msg) const {
71 LLVMContext &Ctx = FailedInst.getContext();
72
73 // TODO: Do not use generic error type.
74 Ctx.emitError(&FailedInst, Msg);
75
76 if (!FailedInst.getType()->isVoidTy())
77 FailedInst.replaceAllUsesWith(PoisonValue::get(FailedInst.getType()));
78 FailedInst.eraseFromParent();
79 }
80
81 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
82 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
83 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
84 bool tryExpandAtomicLoad(LoadInst *LI);
85 bool expandAtomicLoadToLL(LoadInst *LI);
86 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
87 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
88 bool tryExpandAtomicStore(StoreInst *SI);
89 void expandAtomicStoreToXChg(StoreInst *SI);
90 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
91 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
92 Value *
93 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
94 Align AddrAlign, AtomicOrdering MemOpOrder,
95 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
96 void expandAtomicOpToLLSC(
97 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
98 AtomicOrdering MemOpOrder,
99 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
100 void expandPartwordAtomicRMW(
102 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
103 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
104 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
105 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
106
107 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
108 static Value *insertRMWCmpXchgLoop(
109 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
110 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
111 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
112 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
113 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
114
115 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
116 bool isIdempotentRMW(AtomicRMWInst *RMWI);
117 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
118
119 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
120 Value *PointerOperand, Value *ValueOperand,
121 Value *CASExpected, AtomicOrdering Ordering,
122 AtomicOrdering Ordering2,
123 ArrayRef<RTLIB::Libcall> Libcalls);
124 void expandAtomicLoadToLibcall(LoadInst *LI);
125 void expandAtomicStoreToLibcall(StoreInst *LI);
126 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
127 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
128
129 friend bool
131 CreateCmpXchgInstFun CreateCmpXchg);
132
133 bool processAtomicInstr(Instruction *I);
134
135public:
136 bool run(Function &F,
137 const LibcallLoweringModuleAnalysisResult &LibcallResult,
138 const TargetMachine *TM);
139};
140
141class AtomicExpandLegacy : public FunctionPass {
142public:
143 static char ID; // Pass identification, replacement for typeid
144
145 AtomicExpandLegacy() : FunctionPass(ID) {
147 }
148
149 void getAnalysisUsage(AnalysisUsage &AU) const override {
152 }
153
154 bool runOnFunction(Function &F) override;
155};
156
157// IRBuilder to be used for replacement atomic instructions.
158struct ReplacementIRBuilder
159 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
160 MDNode *MMRAMD = nullptr;
161
162 // Preserves the DebugLoc from I, and preserves still valid metadata.
163 // Enable StrictFP builder mode when appropriate.
164 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
165 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
167 [this](Instruction *I) { addMMRAMD(I); })) {
168 SetInsertPoint(I);
169 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
170 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
171 this->setIsFPConstrained(true);
172
173 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
174 }
175
176 void addMMRAMD(Instruction *I) {
178 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
179 }
180};
181
182} // end anonymous namespace
183
184char AtomicExpandLegacy::ID = 0;
185
186char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
187
189 "Expand Atomic instructions", false, false)
192INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
193 "Expand Atomic instructions", false, false)
194
195// Helper functions to retrieve the size of atomic instructions.
196static unsigned getAtomicOpSize(LoadInst *LI) {
197 const DataLayout &DL = LI->getDataLayout();
198 return DL.getTypeStoreSize(LI->getType());
199}
200
201static unsigned getAtomicOpSize(StoreInst *SI) {
202 const DataLayout &DL = SI->getDataLayout();
203 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
204}
205
206static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
207 const DataLayout &DL = RMWI->getDataLayout();
208 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
209}
210
211static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
212 const DataLayout &DL = CASI->getDataLayout();
213 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
214}
215
216/// Copy metadata that's safe to preserve when widening atomics.
218 const Instruction &Source) {
220 Source.getAllMetadata(MD);
221 LLVMContext &Ctx = Dest.getContext();
222 MDBuilder MDB(Ctx);
223
224 for (auto [ID, N] : MD) {
225 switch (ID) {
226 case LLVMContext::MD_dbg:
227 case LLVMContext::MD_tbaa:
228 case LLVMContext::MD_tbaa_struct:
229 case LLVMContext::MD_alias_scope:
230 case LLVMContext::MD_noalias:
231 case LLVMContext::MD_noalias_addrspace:
232 case LLVMContext::MD_access_group:
233 case LLVMContext::MD_mmra:
234 Dest.setMetadata(ID, N);
235 break;
236 default:
237 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
238 Dest.setMetadata(ID, N);
239 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
240 Dest.setMetadata(ID, N);
241
242 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
243 // uses.
244 break;
245 }
246 }
247}
248
249// Determine if a particular atomic operation has a supported size,
250// and is of appropriate alignment, to be passed through for target
251// lowering. (Versus turning into a __atomic libcall)
252template <typename Inst>
253static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
254 unsigned Size = getAtomicOpSize(I);
255 Align Alignment = I->getAlign();
256 return Alignment >= Size &&
258}
259
260bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
261 auto *LI = dyn_cast<LoadInst>(I);
262 auto *SI = dyn_cast<StoreInst>(I);
263 auto *RMWI = dyn_cast<AtomicRMWInst>(I);
264 auto *CASI = dyn_cast<AtomicCmpXchgInst>(I);
265
266 bool MadeChange = false;
267
268 // If the Size/Alignment is not supported, replace with a libcall.
269 if (LI) {
270 if (!LI->isAtomic())
271 return false;
272
273 if (!atomicSizeSupported(TLI, LI)) {
274 expandAtomicLoadToLibcall(LI);
275 return true;
276 }
277
278 if (TLI->shouldCastAtomicLoadInIR(LI) ==
279 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
280 I = LI = convertAtomicLoadToIntegerType(LI);
281 MadeChange = true;
282 }
283 } else if (SI) {
284 if (!SI->isAtomic())
285 return false;
286
287 if (!atomicSizeSupported(TLI, SI)) {
288 expandAtomicStoreToLibcall(SI);
289 return true;
290 }
291
292 if (TLI->shouldCastAtomicStoreInIR(SI) ==
293 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
294 I = SI = convertAtomicStoreToIntegerType(SI);
295 MadeChange = true;
296 }
297 } else if (RMWI) {
298 if (!atomicSizeSupported(TLI, RMWI)) {
299 expandAtomicRMWToLibcall(RMWI);
300 return true;
301 }
302
303 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
304 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
305 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
306 MadeChange = true;
307 }
308 } else if (CASI) {
309 if (!atomicSizeSupported(TLI, CASI)) {
310 expandAtomicCASToLibcall(CASI);
311 return true;
312 }
313
314 // TODO: when we're ready to make the change at the IR level, we can
315 // extend convertCmpXchgToInteger for floating point too.
316 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
317 // TODO: add a TLI hook to control this so that each target can
318 // convert to lowering the original type one at a time.
319 I = CASI = convertCmpXchgToIntegerType(CASI);
320 MadeChange = true;
321 }
322 } else
323 return false;
324
325 if (TLI->shouldInsertFencesForAtomic(I)) {
326 auto FenceOrdering = AtomicOrdering::Monotonic;
327 if (LI && isAcquireOrStronger(LI->getOrdering())) {
328 FenceOrdering = LI->getOrdering();
329 LI->setOrdering(AtomicOrdering::Monotonic);
330 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
331 FenceOrdering = SI->getOrdering();
332 SI->setOrdering(AtomicOrdering::Monotonic);
333 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
334 isAcquireOrStronger(RMWI->getOrdering()))) {
335 FenceOrdering = RMWI->getOrdering();
336 RMWI->setOrdering(AtomicOrdering::Monotonic);
337 } else if (CASI &&
339 TargetLoweringBase::AtomicExpansionKind::None &&
340 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
341 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
342 isAcquireOrStronger(CASI->getFailureOrdering()))) {
343 // If a compare and swap is lowered to LL/SC, we can do smarter fence
344 // insertion, with a stronger one on the success path than on the
345 // failure path. As a result, fence insertion is directly done by
346 // expandAtomicCmpXchg in that case.
347 FenceOrdering = CASI->getMergedOrdering();
348 auto CASOrdering = TLI->atomicOperationOrderAfterFenceSplit(CASI);
349
350 CASI->setSuccessOrdering(CASOrdering);
351 CASI->setFailureOrdering(CASOrdering);
352 }
353
354 if (FenceOrdering != AtomicOrdering::Monotonic) {
355 MadeChange |= bracketInstWithFences(I, FenceOrdering);
356 }
358 !(CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
359 TargetLoweringBase::AtomicExpansionKind::LLSC)) {
360 // CmpXchg LLSC is handled in expandAtomicCmpXchg().
361 IRBuilder Builder(I);
362 if (auto TrailingFence = TLI->emitTrailingFence(
363 Builder, I, AtomicOrdering::SequentiallyConsistent)) {
364 TrailingFence->moveAfter(I);
365 MadeChange = true;
366 }
367 }
368
369 if (LI)
370 MadeChange |= tryExpandAtomicLoad(LI);
371 else if (SI)
372 MadeChange |= tryExpandAtomicStore(SI);
373 else if (RMWI) {
374 // There are two different ways of expanding RMW instructions:
375 // - into a load if it is idempotent
376 // - into a Cmpxchg/LL-SC loop otherwise
377 // we try them in that order.
378
379 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
380 MadeChange = true;
381
382 } else {
383 MadeChange |= tryExpandAtomicRMW(RMWI);
384 }
385 } else if (CASI)
386 MadeChange |= tryExpandAtomicCmpXchg(CASI);
387
388 return MadeChange;
389}
390
391bool AtomicExpandImpl::run(
392 Function &F, const LibcallLoweringModuleAnalysisResult &LibcallResult,
393 const TargetMachine *TM) {
394 const auto *Subtarget = TM->getSubtargetImpl(F);
395 if (!Subtarget->enableAtomicExpand())
396 return false;
397 TLI = Subtarget->getTargetLowering();
398 LibcallLowering = &LibcallResult.getLibcallLowering(*Subtarget);
399 DL = &F.getDataLayout();
400
401 bool MadeChange = false;
402
403 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
404 BasicBlock *BB = &*BBI;
405
407
408 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
409 I = Next) {
410 Instruction &Inst = *I;
411 Next = std::next(I);
412
413 if (processAtomicInstr(&Inst)) {
414 MadeChange = true;
415
416 // New blocks may have been inserted.
417 BBE = F.end();
418 }
419 }
420 }
421
422 return MadeChange;
423}
424
425bool AtomicExpandLegacy::runOnFunction(Function &F) {
426
427 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
428 if (!TPC)
429 return false;
430 auto *TM = &TPC->getTM<TargetMachine>();
431
432 const LibcallLoweringModuleAnalysisResult &LibcallResult =
433 getAnalysis<LibcallLoweringInfoWrapper>().getResult(*F.getParent());
434 AtomicExpandImpl AE;
435 return AE.run(F, LibcallResult, TM);
436}
437
439 return new AtomicExpandLegacy();
440}
441
444 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
445
446 const LibcallLoweringModuleAnalysisResult *LibcallResult =
447 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(*F.getParent());
448
449 if (!LibcallResult) {
450 F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() +
451 "' analysis required");
452 return PreservedAnalyses::all();
453 }
454
455 AtomicExpandImpl AE;
456
457 bool Changed = AE.run(F, *LibcallResult, TM);
458 if (!Changed)
459 return PreservedAnalyses::all();
460
462}
463
464bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
465 AtomicOrdering Order) {
466 ReplacementIRBuilder Builder(I, *DL);
467
468 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
469
470 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
471 // We have a guard here because not every atomic operation generates a
472 // trailing fence.
473 if (TrailingFence)
474 TrailingFence->moveAfter(I);
475
476 return (LeadingFence || TrailingFence);
477}
478
479/// Get the iX type with the same bitwidth as T.
481AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
482 EVT VT = TLI->getMemValueType(DL, T);
483 unsigned BitWidth = VT.getStoreSizeInBits();
484 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
485 return IntegerType::get(T->getContext(), BitWidth);
486}
487
488/// Convert an atomic load of a non-integral type to an integer load of the
489/// equivalent bitwidth. See the function comment on
490/// convertAtomicStoreToIntegerType for background.
491LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
492 auto *M = LI->getModule();
493 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
494
495 ReplacementIRBuilder Builder(LI, *DL);
496
497 Value *Addr = LI->getPointerOperand();
498
499 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
500 NewLI->setAlignment(LI->getAlign());
501 NewLI->setVolatile(LI->isVolatile());
502 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
503 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
504
505 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
506 LI->replaceAllUsesWith(NewVal);
507 LI->eraseFromParent();
508 return NewLI;
509}
510
511AtomicRMWInst *
512AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
514
515 auto *M = RMWI->getModule();
516 Type *NewTy =
517 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
518
519 ReplacementIRBuilder Builder(RMWI, *DL);
520
521 Value *Addr = RMWI->getPointerOperand();
522 Value *Val = RMWI->getValOperand();
523 Value *NewVal = Val->getType()->isPointerTy()
524 ? Builder.CreatePtrToInt(Val, NewTy)
525 : Builder.CreateBitCast(Val, NewTy);
526
527 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
528 RMWI->getAlign(), RMWI->getOrdering(),
529 RMWI->getSyncScopeID());
530 NewRMWI->setVolatile(RMWI->isVolatile());
531 copyMetadataForAtomic(*NewRMWI, *RMWI);
532 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
533
534 Value *NewRVal = RMWI->getType()->isPointerTy()
535 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
536 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
537 RMWI->replaceAllUsesWith(NewRVal);
538 RMWI->eraseFromParent();
539 return NewRMWI;
540}
541
542bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
543 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
544 case TargetLoweringBase::AtomicExpansionKind::None:
545 return false;
546 case TargetLoweringBase::AtomicExpansionKind::LLSC:
547 expandAtomicOpToLLSC(
548 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
549 LI->getOrdering(),
550 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
551 return true;
552 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
553 return expandAtomicLoadToLL(LI);
554 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
555 return expandAtomicLoadToCmpXchg(LI);
556 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
557 LI->setAtomic(AtomicOrdering::NotAtomic);
558 return true;
559 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
560 TLI->emitExpandAtomicLoad(LI);
561 return true;
562 default:
563 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
564 }
565}
566
567bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
568 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
569 case TargetLoweringBase::AtomicExpansionKind::None:
570 return false;
571 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
572 TLI->emitExpandAtomicStore(SI);
573 return true;
574 case TargetLoweringBase::AtomicExpansionKind::Expand:
575 expandAtomicStoreToXChg(SI);
576 return true;
577 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
578 SI->setAtomic(AtomicOrdering::NotAtomic);
579 return true;
580 default:
581 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
582 }
583}
584
585bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
586 ReplacementIRBuilder Builder(LI, *DL);
587
588 // On some architectures, load-linked instructions are atomic for larger
589 // sizes than normal loads. For example, the only 64-bit load guaranteed
590 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
591 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
592 LI->getPointerOperand(), LI->getOrdering());
594
595 LI->replaceAllUsesWith(Val);
596 LI->eraseFromParent();
597
598 return true;
599}
600
601bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
602 ReplacementIRBuilder Builder(LI, *DL);
603 AtomicOrdering Order = LI->getOrdering();
604 if (Order == AtomicOrdering::Unordered)
605 Order = AtomicOrdering::Monotonic;
606
607 Value *Addr = LI->getPointerOperand();
608 Type *Ty = LI->getType();
609 Constant *DummyVal = Constant::getNullValue(Ty);
610
611 Value *Pair = Builder.CreateAtomicCmpXchg(
612 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
614 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
615
616 LI->replaceAllUsesWith(Loaded);
617 LI->eraseFromParent();
618
619 return true;
620}
621
622/// Convert an atomic store of a non-integral type to an integer store of the
623/// equivalent bitwidth. We used to not support floating point or vector
624/// atomics in the IR at all. The backends learned to deal with the bitcast
625/// idiom because that was the only way of expressing the notion of a atomic
626/// float or vector store. The long term plan is to teach each backend to
627/// instruction select from the original atomic store, but as a migration
628/// mechanism, we convert back to the old format which the backends understand.
629/// Each backend will need individual work to recognize the new format.
630StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
631 ReplacementIRBuilder Builder(SI, *DL);
632 auto *M = SI->getModule();
633 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
634 M->getDataLayout());
635 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
636
637 Value *Addr = SI->getPointerOperand();
638
639 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
640 NewSI->setAlignment(SI->getAlign());
641 NewSI->setVolatile(SI->isVolatile());
642 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
643 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
644 SI->eraseFromParent();
645 return NewSI;
646}
647
648void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
649 // This function is only called on atomic stores that are too large to be
650 // atomic if implemented as a native store. So we replace them by an
651 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
652 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
653 // It is the responsibility of the target to only signal expansion via
654 // shouldExpandAtomicRMW in cases where this is required and possible.
655 ReplacementIRBuilder Builder(SI, *DL);
656 AtomicOrdering Ordering = SI->getOrdering();
657 assert(Ordering != AtomicOrdering::NotAtomic);
658 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
659 ? AtomicOrdering::Monotonic
660 : Ordering;
661 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
662 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
663 SI->getAlign(), RMWOrdering);
664 SI->eraseFromParent();
665
666 // Now we have an appropriate swap instruction, lower it as usual.
667 tryExpandAtomicRMW(AI);
668}
669
670static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
671 Value *Loaded, Value *NewVal, Align AddrAlign,
672 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
673 Value *&Success, Value *&NewLoaded,
674 Instruction *MetadataSrc) {
675 Type *OrigTy = NewVal->getType();
676
677 // This code can go away when cmpxchg supports FP and vector types.
678 assert(!OrigTy->isPointerTy());
679 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
680 if (NeedBitcast) {
681 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
682 NewVal = Builder.CreateBitCast(NewVal, IntTy);
683 Loaded = Builder.CreateBitCast(Loaded, IntTy);
684 }
685
686 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
687 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
689 if (MetadataSrc)
690 copyMetadataForAtomic(*Pair, *MetadataSrc);
691
692 Success = Builder.CreateExtractValue(Pair, 1, "success");
693 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
694
695 if (NeedBitcast)
696 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
697}
698
699bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
700 LLVMContext &Ctx = AI->getModule()->getContext();
701 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
702 switch (Kind) {
703 case TargetLoweringBase::AtomicExpansionKind::None:
704 return false;
705 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
706 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
707 unsigned ValueSize = getAtomicOpSize(AI);
708 if (ValueSize < MinCASSize) {
709 expandPartwordAtomicRMW(AI,
710 TargetLoweringBase::AtomicExpansionKind::LLSC);
711 } else {
712 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
713 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
714 AI->getValOperand());
715 };
716 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
717 AI->getAlign(), AI->getOrdering(), PerformOp);
718 }
719 return true;
720 }
721 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
722 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
723 unsigned ValueSize = getAtomicOpSize(AI);
724 if (ValueSize < MinCASSize) {
725 expandPartwordAtomicRMW(AI,
726 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
727 } else {
729 Ctx.getSyncScopeNames(SSNs);
730 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
731 ? "system"
732 : SSNs[AI->getSyncScopeID()];
733 OptimizationRemarkEmitter ORE(AI->getFunction());
734 ORE.emit([&]() {
735 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
736 << "A compare and swap loop was generated for an atomic "
737 << AI->getOperationName(AI->getOperation()) << " operation at "
738 << MemScope << " memory scope";
739 });
741 }
742 return true;
743 }
744 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
745 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
746 unsigned ValueSize = getAtomicOpSize(AI);
747 if (ValueSize < MinCASSize) {
749 // Widen And/Or/Xor and give the target another chance at expanding it.
752 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
753 return true;
754 }
755 }
756 expandAtomicRMWToMaskedIntrinsic(AI);
757 return true;
758 }
759 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
761 return true;
762 }
763 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
765 return true;
766 }
767 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
768 return lowerAtomicRMWInst(AI);
769 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
770 TLI->emitExpandAtomicRMW(AI);
771 return true;
772 default:
773 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
774 }
775}
776
777namespace {
778
779struct PartwordMaskValues {
780 // These three fields are guaranteed to be set by createMaskInstrs.
781 Type *WordType = nullptr;
782 Type *ValueType = nullptr;
783 Type *IntValueType = nullptr;
784 Value *AlignedAddr = nullptr;
785 Align AlignedAddrAlignment;
786 // The remaining fields can be null.
787 Value *ShiftAmt = nullptr;
788 Value *Mask = nullptr;
789 Value *Inv_Mask = nullptr;
790};
791
792[[maybe_unused]]
793raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
794 auto PrintObj = [&O](auto *V) {
795 if (V)
796 O << *V;
797 else
798 O << "nullptr";
799 O << '\n';
800 };
801 O << "PartwordMaskValues {\n";
802 O << " WordType: ";
803 PrintObj(PMV.WordType);
804 O << " ValueType: ";
805 PrintObj(PMV.ValueType);
806 O << " AlignedAddr: ";
807 PrintObj(PMV.AlignedAddr);
808 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
809 O << " ShiftAmt: ";
810 PrintObj(PMV.ShiftAmt);
811 O << " Mask: ";
812 PrintObj(PMV.Mask);
813 O << " Inv_Mask: ";
814 PrintObj(PMV.Inv_Mask);
815 O << "}\n";
816 return O;
817}
818
819} // end anonymous namespace
820
821/// This is a helper function which builds instructions to provide
822/// values necessary for partword atomic operations. It takes an
823/// incoming address, Addr, and ValueType, and constructs the address,
824/// shift-amounts and masks needed to work with a larger value of size
825/// WordSize.
826///
827/// AlignedAddr: Addr rounded down to a multiple of WordSize
828///
829/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
830/// from AlignAddr for it to have the same value as if
831/// ValueType was loaded from Addr.
832///
833/// Mask: Value to mask with the value loaded from AlignAddr to
834/// include only the part that would've been loaded from Addr.
835///
836/// Inv_Mask: The inverse of Mask.
837static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
839 Value *Addr, Align AddrAlign,
840 unsigned MinWordSize) {
841 PartwordMaskValues PMV;
842
843 Module *M = I->getModule();
844 LLVMContext &Ctx = M->getContext();
845 const DataLayout &DL = M->getDataLayout();
846 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
847
848 PMV.ValueType = PMV.IntValueType = ValueType;
849 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
850 PMV.IntValueType =
851 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
852
853 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
854 : ValueType;
855 if (PMV.ValueType == PMV.WordType) {
856 PMV.AlignedAddr = Addr;
857 PMV.AlignedAddrAlignment = AddrAlign;
858 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
859 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
860 return PMV;
861 }
862
863 PMV.AlignedAddrAlignment = Align(MinWordSize);
864
865 assert(ValueSize < MinWordSize);
866
867 PointerType *PtrTy = cast<PointerType>(Addr->getType());
868 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
869 Value *PtrLSB;
870
871 if (AddrAlign < MinWordSize) {
872 PMV.AlignedAddr = Builder.CreateIntrinsic(
873 Intrinsic::ptrmask, {PtrTy, IntTy},
874 {Addr, ConstantInt::getSigned(IntTy, ~(uint64_t)(MinWordSize - 1))},
875 nullptr, "AlignedAddr");
876
877 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
878 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
879 } else {
880 // If the alignment is high enough, the LSB are known 0.
881 PMV.AlignedAddr = Addr;
882 PtrLSB = ConstantInt::getNullValue(IntTy);
883 }
884
885 if (DL.isLittleEndian()) {
886 // turn bytes into bits
887 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
888 } else {
889 // turn bytes into bits, and count from the other side.
890 PMV.ShiftAmt = Builder.CreateShl(
891 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
892 }
893
894 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
895 PMV.Mask = Builder.CreateShl(
896 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
897 "Mask");
898
899 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
900
901 return PMV;
902}
903
904static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
905 const PartwordMaskValues &PMV) {
906 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
907 if (PMV.WordType == PMV.ValueType)
908 return WideWord;
909
910 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
911 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
912 return Builder.CreateBitCast(Trunc, PMV.ValueType);
913}
914
915static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
916 Value *Updated, const PartwordMaskValues &PMV) {
917 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
918 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
919 if (PMV.WordType == PMV.ValueType)
920 return Updated;
921
922 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
923
924 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
925 Value *Shift =
926 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
927 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
928 Value *Or = Builder.CreateOr(And, Shift, "inserted");
929 return Or;
930}
931
932/// Emit IR to implement a masked version of a given atomicrmw
933/// operation. (That is, only the bits under the Mask should be
934/// affected by the operation)
936 IRBuilderBase &Builder, Value *Loaded,
937 Value *Shifted_Inc, Value *Inc,
938 const PartwordMaskValues &PMV) {
939 // TODO: update to use
940 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
941 // to merge bits from two values without requiring PMV.Inv_Mask.
942 switch (Op) {
943 case AtomicRMWInst::Xchg: {
944 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
945 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
946 return FinalVal;
947 }
951 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
954 case AtomicRMWInst::Nand: {
955 // The other arithmetic ops need to be masked into place.
956 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
957 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
958 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
959 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
960 return FinalVal;
961 }
976 // Finally, other ops will operate on the full value, so truncate down to
977 // the original size, and expand out again after doing the
978 // operation. Bitcasts will be inserted for FP values.
979 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
980 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
981 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
982 return FinalVal;
983 }
984 default:
985 llvm_unreachable("Unknown atomic op");
986 }
987}
988
989/// Expand a sub-word atomicrmw operation into an appropriate
990/// word-sized operation.
991///
992/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
993/// way as a typical atomicrmw expansion. The only difference here is
994/// that the operation inside of the loop may operate upon only a
995/// part of the value.
996void AtomicExpandImpl::expandPartwordAtomicRMW(
997 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
998 // Widen And/Or/Xor and give the target another chance at expanding it.
1002 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
1003 return;
1004 }
1005 AtomicOrdering MemOpOrder = AI->getOrdering();
1006 SyncScope::ID SSID = AI->getSyncScopeID();
1007
1008 ReplacementIRBuilder Builder(AI, *DL);
1009
1010 PartwordMaskValues PMV =
1011 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1012 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1013
1014 Value *ValOperand_Shifted = nullptr;
1017 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
1018 ValOperand_Shifted =
1019 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
1020 "ValOperand_Shifted");
1021 }
1022
1023 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
1024 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
1025 AI->getValOperand(), PMV);
1026 };
1027
1028 Value *OldResult;
1029 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
1030 OldResult = insertRMWCmpXchgLoop(
1031 Builder, PMV.WordType, PMV.AlignedAddr, PMV.AlignedAddrAlignment,
1032 MemOpOrder, SSID, PerformPartwordOp, createCmpXchgInstFun, AI);
1033 } else {
1034 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
1035 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1036 PMV.AlignedAddrAlignment, MemOpOrder,
1037 PerformPartwordOp);
1038 }
1039
1040 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1041 AI->replaceAllUsesWith(FinalOldResult);
1042 AI->eraseFromParent();
1043}
1044
1045// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1046AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1047 ReplacementIRBuilder Builder(AI, *DL);
1049
1051 Op == AtomicRMWInst::And) &&
1052 "Unable to widen operation");
1053
1054 PartwordMaskValues PMV =
1055 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1056 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1057
1058 Value *ValOperand_Shifted =
1059 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
1060 PMV.ShiftAmt, "ValOperand_Shifted");
1061
1062 Value *NewOperand;
1063
1064 if (Op == AtomicRMWInst::And)
1065 NewOperand =
1066 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
1067 else
1068 NewOperand = ValOperand_Shifted;
1069
1070 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1071 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1072 AI->getOrdering(), AI->getSyncScopeID());
1073
1074 copyMetadataForAtomic(*NewAI, *AI);
1075
1076 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1077 AI->replaceAllUsesWith(FinalOldResult);
1078 AI->eraseFromParent();
1079 return NewAI;
1080}
1081
1082bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1083 // The basic idea here is that we're expanding a cmpxchg of a
1084 // smaller memory size up to a word-sized cmpxchg. To do this, we
1085 // need to add a retry-loop for strong cmpxchg, so that
1086 // modifications to other parts of the word don't cause a spurious
1087 // failure.
1088
1089 // This generates code like the following:
1090 // [[Setup mask values PMV.*]]
1091 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1092 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1093 // %InitLoaded = load i32* %addr
1094 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1095 // br partword.cmpxchg.loop
1096 // partword.cmpxchg.loop:
1097 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1098 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1099 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1100 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1101 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1102 // i32 %FullWord_NewVal success_ordering failure_ordering
1103 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1104 // %Success = extractvalue { i32, i1 } %NewCI, 1
1105 // br i1 %Success, label %partword.cmpxchg.end,
1106 // label %partword.cmpxchg.failure
1107 // partword.cmpxchg.failure:
1108 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1109 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1110 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1111 // label %partword.cmpxchg.end
1112 // partword.cmpxchg.end:
1113 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1114 // %FinalOldVal = trunc i32 %tmp1 to i8
1115 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1116 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1117
1118 Value *Addr = CI->getPointerOperand();
1119 Value *Cmp = CI->getCompareOperand();
1120 Value *NewVal = CI->getNewValOperand();
1121
1122 BasicBlock *BB = CI->getParent();
1123 Function *F = BB->getParent();
1124 ReplacementIRBuilder Builder(CI, *DL);
1125 LLVMContext &Ctx = Builder.getContext();
1126
1127 BasicBlock *EndBB =
1128 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1129 auto FailureBB =
1130 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1131 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1132
1133 // The split call above "helpfully" added a branch at the end of BB
1134 // (to the wrong place).
1135 std::prev(BB->end())->eraseFromParent();
1136 Builder.SetInsertPoint(BB);
1137
1138 PartwordMaskValues PMV =
1139 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1140 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1141
1142 // Shift the incoming values over, into the right location in the word.
1143 Value *NewVal_Shifted =
1144 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1145 Value *Cmp_Shifted =
1146 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1147
1148 // Load the entire current word, and mask into place the expected and new
1149 // values
1150 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1151 InitLoaded->setVolatile(CI->isVolatile());
1152 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1153 Builder.CreateBr(LoopBB);
1154
1155 // partword.cmpxchg.loop:
1156 Builder.SetInsertPoint(LoopBB);
1157 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1158 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1159
1160 // Mask/Or the expected and new values into place in the loaded word.
1161 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1162 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1163 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1164 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1166 NewCI->setVolatile(CI->isVolatile());
1167 // When we're building a strong cmpxchg, we need a loop, so you
1168 // might think we could use a weak cmpxchg inside. But, using strong
1169 // allows the below comparison for ShouldContinue, and we're
1170 // expecting the underlying cmpxchg to be a machine instruction,
1171 // which is strong anyways.
1172 NewCI->setWeak(CI->isWeak());
1173
1174 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1175 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1176
1177 if (CI->isWeak())
1178 Builder.CreateBr(EndBB);
1179 else
1180 Builder.CreateCondBr(Success, EndBB, FailureBB);
1181
1182 // partword.cmpxchg.failure:
1183 Builder.SetInsertPoint(FailureBB);
1184 // Upon failure, verify that the masked-out part of the loaded value
1185 // has been modified. If it didn't, abort the cmpxchg, since the
1186 // masked-in part must've.
1187 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1188 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1189 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1190
1191 // Add the second value to the phi from above
1192 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1193
1194 // partword.cmpxchg.end:
1195 Builder.SetInsertPoint(CI);
1196
1197 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1198 Value *Res = PoisonValue::get(CI->getType());
1199 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1200 Res = Builder.CreateInsertValue(Res, Success, 1);
1201
1202 CI->replaceAllUsesWith(Res);
1203 CI->eraseFromParent();
1204 return true;
1205}
1206
1207void AtomicExpandImpl::expandAtomicOpToLLSC(
1208 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1209 AtomicOrdering MemOpOrder,
1210 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1211 ReplacementIRBuilder Builder(I, *DL);
1212 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1213 MemOpOrder, PerformOp);
1214
1215 I->replaceAllUsesWith(Loaded);
1216 I->eraseFromParent();
1217}
1218
1219void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1220 ReplacementIRBuilder Builder(AI, *DL);
1221
1222 PartwordMaskValues PMV =
1223 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1224 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1225
1226 // The value operand must be sign-extended for signed min/max so that the
1227 // target's signed comparison instructions can be used. Otherwise, just
1228 // zero-ext.
1229 Instruction::CastOps CastOp = Instruction::ZExt;
1230 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1231 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1232 CastOp = Instruction::SExt;
1233
1234 Value *ValOperand_Shifted = Builder.CreateShl(
1235 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1236 PMV.ShiftAmt, "ValOperand_Shifted");
1237 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1238 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1239 AI->getOrdering());
1240 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1241 AI->replaceAllUsesWith(FinalOldResult);
1242 AI->eraseFromParent();
1243}
1244
1245void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1246 AtomicCmpXchgInst *CI) {
1247 ReplacementIRBuilder Builder(CI, *DL);
1248
1249 PartwordMaskValues PMV = createMaskInstrs(
1250 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1251 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1252
1253 Value *CmpVal_Shifted = Builder.CreateShl(
1254 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1255 "CmpVal_Shifted");
1256 Value *NewVal_Shifted = Builder.CreateShl(
1257 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1258 "NewVal_Shifted");
1260 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1261 CI->getMergedOrdering());
1262 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1263 Value *Res = PoisonValue::get(CI->getType());
1264 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1265 Value *Success = Builder.CreateICmpEQ(
1266 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1267 Res = Builder.CreateInsertValue(Res, Success, 1);
1268
1269 CI->replaceAllUsesWith(Res);
1270 CI->eraseFromParent();
1271}
1272
1273Value *AtomicExpandImpl::insertRMWLLSCLoop(
1274 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1275 AtomicOrdering MemOpOrder,
1276 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1277 LLVMContext &Ctx = Builder.getContext();
1278 BasicBlock *BB = Builder.GetInsertBlock();
1279 Function *F = BB->getParent();
1280
1281 assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) &&
1282 "Expected at least natural alignment at this point.");
1283
1284 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1285 //
1286 // The standard expansion we produce is:
1287 // [...]
1288 // atomicrmw.start:
1289 // %loaded = @load.linked(%addr)
1290 // %new = some_op iN %loaded, %incr
1291 // %stored = @store_conditional(%new, %addr)
1292 // %try_again = icmp i32 ne %stored, 0
1293 // br i1 %try_again, label %loop, label %atomicrmw.end
1294 // atomicrmw.end:
1295 // [...]
1296 BasicBlock *ExitBB =
1297 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1298 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1299
1300 // The split call above "helpfully" added a branch at the end of BB (to the
1301 // wrong place).
1302 std::prev(BB->end())->eraseFromParent();
1303 Builder.SetInsertPoint(BB);
1304 Builder.CreateBr(LoopBB);
1305
1306 // Start the main loop block now that we've taken care of the preliminaries.
1307 Builder.SetInsertPoint(LoopBB);
1308 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1309
1310 Value *NewVal = PerformOp(Builder, Loaded);
1311
1312 Value *StoreSuccess =
1313 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1314 Value *TryAgain = Builder.CreateICmpNE(
1315 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1316
1317 Instruction *CondBr = Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1318
1319 // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
1320 // hard to predict precise branch weigths we mark the branch as "unknown"
1321 // (50/50) to prevent misleading optimizations.
1323
1324 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1325 return Loaded;
1326}
1327
1328/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1329/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1330/// IR. As a migration step, we convert back to what use to be the standard
1331/// way to represent a pointer cmpxchg so that we can update backends one by
1332/// one.
1333AtomicCmpXchgInst *
1334AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1335 auto *M = CI->getModule();
1336 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1337 M->getDataLayout());
1338
1339 ReplacementIRBuilder Builder(CI, *DL);
1340
1341 Value *Addr = CI->getPointerOperand();
1342
1343 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1344 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1345
1346 auto *NewCI = Builder.CreateAtomicCmpXchg(
1347 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1348 CI->getFailureOrdering(), CI->getSyncScopeID());
1349 NewCI->setVolatile(CI->isVolatile());
1350 NewCI->setWeak(CI->isWeak());
1351 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1352
1353 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1354 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1355
1356 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1357
1358 Value *Res = PoisonValue::get(CI->getType());
1359 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1360 Res = Builder.CreateInsertValue(Res, Succ, 1);
1361
1362 CI->replaceAllUsesWith(Res);
1363 CI->eraseFromParent();
1364 return NewCI;
1365}
1366
1367bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1368 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1369 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1370 Value *Addr = CI->getPointerOperand();
1371 BasicBlock *BB = CI->getParent();
1372 Function *F = BB->getParent();
1373 LLVMContext &Ctx = F->getContext();
1374 // If shouldInsertFencesForAtomic() returns true, then the target does not
1375 // want to deal with memory orders, and emitLeading/TrailingFence should take
1376 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1377 // should preserve the ordering.
1378 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1379 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1380 ? AtomicOrdering::Monotonic
1381 : CI->getMergedOrdering();
1382
1383 // In implementations which use a barrier to achieve release semantics, we can
1384 // delay emitting this barrier until we know a store is actually going to be
1385 // attempted. The cost of this delay is that we need 2 copies of the block
1386 // emitting the load-linked, affecting code size.
1387 //
1388 // Ideally, this logic would be unconditional except for the minsize check
1389 // since in other cases the extra blocks naturally collapse down to the
1390 // minimal loop. Unfortunately, this puts too much stress on later
1391 // optimisations so we avoid emitting the extra logic in those cases too.
1392 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1393 SuccessOrder != AtomicOrdering::Monotonic &&
1394 SuccessOrder != AtomicOrdering::Acquire &&
1395 !F->hasMinSize();
1396
1397 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1398 // do it even on minsize.
1399 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1400
1401 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1402 //
1403 // The full expansion we produce is:
1404 // [...]
1405 // %aligned.addr = ...
1406 // cmpxchg.start:
1407 // %unreleasedload = @load.linked(%aligned.addr)
1408 // %unreleasedload.extract = extract value from %unreleasedload
1409 // %should_store = icmp eq %unreleasedload.extract, %desired
1410 // br i1 %should_store, label %cmpxchg.releasingstore,
1411 // label %cmpxchg.nostore
1412 // cmpxchg.releasingstore:
1413 // fence?
1414 // br label cmpxchg.trystore
1415 // cmpxchg.trystore:
1416 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1417 // [%releasedload, %cmpxchg.releasedload]
1418 // %updated.new = insert %new into %loaded.trystore
1419 // %stored = @store_conditional(%updated.new, %aligned.addr)
1420 // %success = icmp eq i32 %stored, 0
1421 // br i1 %success, label %cmpxchg.success,
1422 // label %cmpxchg.releasedload/%cmpxchg.failure
1423 // cmpxchg.releasedload:
1424 // %releasedload = @load.linked(%aligned.addr)
1425 // %releasedload.extract = extract value from %releasedload
1426 // %should_store = icmp eq %releasedload.extract, %desired
1427 // br i1 %should_store, label %cmpxchg.trystore,
1428 // label %cmpxchg.failure
1429 // cmpxchg.success:
1430 // fence?
1431 // br label %cmpxchg.end
1432 // cmpxchg.nostore:
1433 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1434 // [%releasedload,
1435 // %cmpxchg.releasedload/%cmpxchg.trystore]
1436 // @load_linked_fail_balance()?
1437 // br label %cmpxchg.failure
1438 // cmpxchg.failure:
1439 // fence?
1440 // br label %cmpxchg.end
1441 // cmpxchg.end:
1442 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1443 // [%loaded.trystore, %cmpxchg.trystore]
1444 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1445 // %loaded = extract value from %loaded.exit
1446 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1447 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1448 // [...]
1449 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1450 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1451 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1452 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1453 auto ReleasedLoadBB =
1454 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1455 auto TryStoreBB =
1456 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1457 auto ReleasingStoreBB =
1458 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1459 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1460
1461 ReplacementIRBuilder Builder(CI, *DL);
1462
1463 // The split call above "helpfully" added a branch at the end of BB (to the
1464 // wrong place), but we might want a fence too. It's easiest to just remove
1465 // the branch entirely.
1466 std::prev(BB->end())->eraseFromParent();
1467 Builder.SetInsertPoint(BB);
1468 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1469 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1470
1471 PartwordMaskValues PMV =
1472 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1473 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1474 Builder.CreateBr(StartBB);
1475
1476 // Start the main loop block now that we've taken care of the preliminaries.
1477 Builder.SetInsertPoint(StartBB);
1478 Value *UnreleasedLoad =
1479 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1480 Value *UnreleasedLoadExtract =
1481 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1482 Value *ShouldStore = Builder.CreateICmpEQ(
1483 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1484
1485 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1486 // jump straight past that fence instruction (if it exists).
1487 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
1488 MDBuilder(F->getContext()).createLikelyBranchWeights());
1489
1490 Builder.SetInsertPoint(ReleasingStoreBB);
1491 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1492 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1493 Builder.CreateBr(TryStoreBB);
1494
1495 Builder.SetInsertPoint(TryStoreBB);
1496 PHINode *LoadedTryStore =
1497 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1498 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1499 Value *NewValueInsert =
1500 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1501 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1502 PMV.AlignedAddr, MemOpOrder);
1503 StoreSuccess = Builder.CreateICmpEQ(
1504 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1505 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1506 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1507 CI->isWeak() ? FailureBB : RetryBB,
1508 MDBuilder(F->getContext()).createLikelyBranchWeights());
1509
1510 Builder.SetInsertPoint(ReleasedLoadBB);
1511 Value *SecondLoad;
1512 if (HasReleasedLoadBB) {
1513 SecondLoad =
1514 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1515 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1516 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1517 CI->getCompareOperand(), "should_store");
1518
1519 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1520 // jump straight past that fence instruction (if it exists).
1521 Builder.CreateCondBr(
1522 ShouldStore, TryStoreBB, NoStoreBB,
1523 MDBuilder(F->getContext()).createLikelyBranchWeights());
1524 // Update PHI node in TryStoreBB.
1525 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1526 } else
1527 Builder.CreateUnreachable();
1528
1529 // Make sure later instructions don't get reordered with a fence if
1530 // necessary.
1531 Builder.SetInsertPoint(SuccessBB);
1532 if (ShouldInsertFencesForAtomic ||
1534 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1535 Builder.CreateBr(ExitBB);
1536
1537 Builder.SetInsertPoint(NoStoreBB);
1538 PHINode *LoadedNoStore =
1539 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1540 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1541 if (HasReleasedLoadBB)
1542 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1543
1544 // In the failing case, where we don't execute the store-conditional, the
1545 // target might want to balance out the load-linked with a dedicated
1546 // instruction (e.g., on ARM, clearing the exclusive monitor).
1548 Builder.CreateBr(FailureBB);
1549
1550 Builder.SetInsertPoint(FailureBB);
1551 PHINode *LoadedFailure =
1552 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1553 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1554 if (CI->isWeak())
1555 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1556 if (ShouldInsertFencesForAtomic)
1557 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1558 Builder.CreateBr(ExitBB);
1559
1560 // Finally, we have control-flow based knowledge of whether the cmpxchg
1561 // succeeded or not. We expose this to later passes by converting any
1562 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1563 // PHI.
1564 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1565 PHINode *LoadedExit =
1566 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1567 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1568 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1569 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1570 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1571 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1572
1573 // This is the "exit value" from the cmpxchg expansion. It may be of
1574 // a type wider than the one in the cmpxchg instruction.
1575 Value *LoadedFull = LoadedExit;
1576
1577 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1578 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1579
1580 // Look for any users of the cmpxchg that are just comparing the loaded value
1581 // against the desired one, and replace them with the CFG-derived version.
1583 for (auto *User : CI->users()) {
1584 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1585 if (!EV)
1586 continue;
1587
1588 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1589 "weird extraction from { iN, i1 }");
1590
1591 if (EV->getIndices()[0] == 0)
1592 EV->replaceAllUsesWith(Loaded);
1593 else
1595
1596 PrunedInsts.push_back(EV);
1597 }
1598
1599 // We can remove the instructions now we're no longer iterating through them.
1600 for (auto *EV : PrunedInsts)
1601 EV->eraseFromParent();
1602
1603 if (!CI->use_empty()) {
1604 // Some use of the full struct return that we don't understand has happened,
1605 // so we've got to reconstruct it properly.
1606 Value *Res;
1607 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1608 Res = Builder.CreateInsertValue(Res, Success, 1);
1609
1610 CI->replaceAllUsesWith(Res);
1611 }
1612
1613 CI->eraseFromParent();
1614 return true;
1615}
1616
1617bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1618 // TODO: Add floating point support.
1619 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1620 if (!C)
1621 return false;
1622
1623 switch (RMWI->getOperation()) {
1624 case AtomicRMWInst::Add:
1625 case AtomicRMWInst::Sub:
1626 case AtomicRMWInst::Or:
1627 case AtomicRMWInst::Xor:
1628 return C->isZero();
1629 case AtomicRMWInst::And:
1630 return C->isMinusOne();
1631 case AtomicRMWInst::Min:
1632 return C->isMaxValue(true);
1633 case AtomicRMWInst::Max:
1634 return C->isMinValue(true);
1636 return C->isMaxValue(false);
1638 return C->isMinValue(false);
1639 default:
1640 return false;
1641 }
1642}
1643
1644bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1645 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1646 tryExpandAtomicLoad(ResultingLoad);
1647 return true;
1648 }
1649 return false;
1650}
1651
1652Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1653 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1654 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1655 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1656 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1657 LLVMContext &Ctx = Builder.getContext();
1658 BasicBlock *BB = Builder.GetInsertBlock();
1659 Function *F = BB->getParent();
1660
1661 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1662 //
1663 // The standard expansion we produce is:
1664 // [...]
1665 // %init_loaded = load atomic iN* %addr
1666 // br label %loop
1667 // loop:
1668 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1669 // %new = some_op iN %loaded, %incr
1670 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1671 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1672 // %success = extractvalue { iN, i1 } %pair, 1
1673 // br i1 %success, label %atomicrmw.end, label %loop
1674 // atomicrmw.end:
1675 // [...]
1676 BasicBlock *ExitBB =
1677 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1678 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1679
1680 // The split call above "helpfully" added a branch at the end of BB (to the
1681 // wrong place), but we want a load. It's easiest to just remove
1682 // the branch entirely.
1683 std::prev(BB->end())->eraseFromParent();
1684 Builder.SetInsertPoint(BB);
1685 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1686 Builder.CreateBr(LoopBB);
1687
1688 // Start the main loop block now that we've taken care of the preliminaries.
1689 Builder.SetInsertPoint(LoopBB);
1690 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1691 Loaded->addIncoming(InitLoaded, BB);
1692
1693 Value *NewVal = PerformOp(Builder, Loaded);
1694
1695 Value *NewLoaded = nullptr;
1696 Value *Success = nullptr;
1697
1698 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1699 MemOpOrder == AtomicOrdering::Unordered
1700 ? AtomicOrdering::Monotonic
1701 : MemOpOrder,
1702 SSID, Success, NewLoaded, MetadataSrc);
1703 assert(Success && NewLoaded);
1704
1705 Loaded->addIncoming(NewLoaded, LoopBB);
1706
1707 Instruction *CondBr = Builder.CreateCondBr(Success, ExitBB, LoopBB);
1708
1709 // Atomic RMW expands to a cmpxchg loop, Since precise branch weights
1710 // cannot be easily determined here, we mark the branch as "unknown" (50/50)
1711 // to prevent misleading optimizations.
1713
1714 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1715 return NewLoaded;
1716}
1717
1718bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1719 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1720 unsigned ValueSize = getAtomicOpSize(CI);
1721
1722 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1723 default:
1724 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1725 case TargetLoweringBase::AtomicExpansionKind::None:
1726 if (ValueSize < MinCASSize)
1727 return expandPartwordCmpXchg(CI);
1728 return false;
1729 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1730 return expandAtomicCmpXchg(CI);
1731 }
1732 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1733 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1734 return true;
1735 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1736 return lowerAtomicCmpXchgInst(CI);
1737 case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
1738 TLI->emitExpandAtomicCmpXchg(CI);
1739 return true;
1740 }
1741 }
1742}
1743
1744// Note: This function is exposed externally by AtomicExpandUtils.h
1746 CreateCmpXchgInstFun CreateCmpXchg) {
1747 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1748 Builder.setIsFPConstrained(
1749 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1750
1751 // FIXME: If FP exceptions are observable, we should force them off for the
1752 // loop for the FP atomics.
1753 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1754 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1755 AI->getOrdering(), AI->getSyncScopeID(),
1756 [&](IRBuilderBase &Builder, Value *Loaded) {
1757 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1758 AI->getValOperand());
1759 },
1760 CreateCmpXchg, /*MetadataSrc=*/AI);
1761
1762 AI->replaceAllUsesWith(Loaded);
1763 AI->eraseFromParent();
1764 return true;
1765}
1766
1767// In order to use one of the sized library calls such as
1768// __atomic_fetch_add_4, the alignment must be sufficient, the size
1769// must be one of the potentially-specialized sizes, and the value
1770// type must actually exist in C on the target (otherwise, the
1771// function wouldn't actually be defined.)
1772static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1773 const DataLayout &DL) {
1774 // TODO: "LargestSize" is an approximation for "largest type that
1775 // you can express in C". It seems to be the case that int128 is
1776 // supported on all 64-bit platforms, otherwise only up to 64-bit
1777 // integers are supported. If we get this wrong, then we'll try to
1778 // call a sized libcall that doesn't actually exist. There should
1779 // really be some more reliable way in LLVM of determining integer
1780 // sizes which are valid in the target's C ABI...
1781 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1782 return Alignment >= Size &&
1783 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1784 Size <= LargestSize;
1785}
1786
1787void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1788 static const RTLIB::Libcall Libcalls[6] = {
1789 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1790 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1791 unsigned Size = getAtomicOpSize(I);
1792
1793 bool expanded = expandAtomicOpToLibcall(
1794 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1795 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1796 if (!expanded)
1797 handleFailure(*I, "unsupported atomic load");
1798}
1799
1800void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1801 static const RTLIB::Libcall Libcalls[6] = {
1802 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1803 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1804 unsigned Size = getAtomicOpSize(I);
1805
1806 bool expanded = expandAtomicOpToLibcall(
1807 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1808 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1809 if (!expanded)
1810 handleFailure(*I, "unsupported atomic store");
1811}
1812
1813void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1814 static const RTLIB::Libcall Libcalls[6] = {
1815 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1816 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1817 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1818 unsigned Size = getAtomicOpSize(I);
1819
1820 bool expanded = expandAtomicOpToLibcall(
1821 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1822 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1823 Libcalls);
1824 if (!expanded)
1825 handleFailure(*I, "unsupported cmpxchg");
1826}
1827
1829 static const RTLIB::Libcall LibcallsXchg[6] = {
1830 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1831 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1832 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1833 static const RTLIB::Libcall LibcallsAdd[6] = {
1834 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1835 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1836 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1837 static const RTLIB::Libcall LibcallsSub[6] = {
1838 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1839 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1840 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1841 static const RTLIB::Libcall LibcallsAnd[6] = {
1842 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1843 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1844 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1845 static const RTLIB::Libcall LibcallsOr[6] = {
1846 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1847 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1848 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1849 static const RTLIB::Libcall LibcallsXor[6] = {
1850 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1851 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1852 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1853 static const RTLIB::Libcall LibcallsNand[6] = {
1854 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1855 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1856 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1857
1858 switch (Op) {
1860 llvm_unreachable("Should not have BAD_BINOP.");
1862 return ArrayRef(LibcallsXchg);
1863 case AtomicRMWInst::Add:
1864 return ArrayRef(LibcallsAdd);
1865 case AtomicRMWInst::Sub:
1866 return ArrayRef(LibcallsSub);
1867 case AtomicRMWInst::And:
1868 return ArrayRef(LibcallsAnd);
1869 case AtomicRMWInst::Or:
1870 return ArrayRef(LibcallsOr);
1871 case AtomicRMWInst::Xor:
1872 return ArrayRef(LibcallsXor);
1874 return ArrayRef(LibcallsNand);
1875 case AtomicRMWInst::Max:
1876 case AtomicRMWInst::Min:
1889 // No atomic libcalls are available for these.
1890 return {};
1891 }
1892 llvm_unreachable("Unexpected AtomicRMW operation.");
1893}
1894
1895void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1896 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1897
1898 unsigned Size = getAtomicOpSize(I);
1899
1900 bool Success = false;
1901 if (!Libcalls.empty())
1902 Success = expandAtomicOpToLibcall(
1903 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1904 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1905
1906 // The expansion failed: either there were no libcalls at all for
1907 // the operation (min/max), or there were only size-specialized
1908 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1909 // CAS libcall, via a CAS loop, instead.
1910 if (!Success) {
1912 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1913 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1914 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded,
1915 Instruction *MetadataSrc) {
1916 // Create the CAS instruction normally...
1917 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1918 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1920 if (MetadataSrc)
1921 copyMetadataForAtomic(*Pair, *MetadataSrc);
1922
1923 Success = Builder.CreateExtractValue(Pair, 1, "success");
1924 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1925
1926 // ...and then expand the CAS into a libcall.
1927 expandAtomicCASToLibcall(Pair);
1928 });
1929 }
1930}
1931
1932// A helper routine for the above expandAtomic*ToLibcall functions.
1933//
1934// 'Libcalls' contains an array of enum values for the particular
1935// ATOMIC libcalls to be emitted. All of the other arguments besides
1936// 'I' are extracted from the Instruction subclass by the
1937// caller. Depending on the particular call, some will be null.
1938bool AtomicExpandImpl::expandAtomicOpToLibcall(
1939 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1940 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1941 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1942 assert(Libcalls.size() == 6);
1943
1944 LLVMContext &Ctx = I->getContext();
1945 Module *M = I->getModule();
1946 const DataLayout &DL = M->getDataLayout();
1947 IRBuilder<> Builder(I);
1948 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1949
1950 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1951 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1952
1953 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1954
1955 // TODO: the "order" argument type is "int", not int32. So
1956 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1957 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1958 Constant *OrderingVal =
1959 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1960 Constant *Ordering2Val = nullptr;
1961 if (CASExpected) {
1962 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1963 Ordering2Val =
1964 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1965 }
1966 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1967
1968 RTLIB::Libcall RTLibType;
1969 if (UseSizedLibcall) {
1970 switch (Size) {
1971 case 1:
1972 RTLibType = Libcalls[1];
1973 break;
1974 case 2:
1975 RTLibType = Libcalls[2];
1976 break;
1977 case 4:
1978 RTLibType = Libcalls[3];
1979 break;
1980 case 8:
1981 RTLibType = Libcalls[4];
1982 break;
1983 case 16:
1984 RTLibType = Libcalls[5];
1985 break;
1986 }
1987 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1988 RTLibType = Libcalls[0];
1989 } else {
1990 // Can't use sized function, and there's no generic for this
1991 // operation, so give up.
1992 return false;
1993 }
1994
1995 RTLIB::LibcallImpl LibcallImpl = LibcallLowering->getLibcallImpl(RTLibType);
1996 if (LibcallImpl == RTLIB::Unsupported) {
1997 // This target does not implement the requested atomic libcall so give up.
1998 return false;
1999 }
2000
2001 // Build up the function call. There's two kinds. First, the sized
2002 // variants. These calls are going to be one of the following (with
2003 // N=1,2,4,8,16):
2004 // iN __atomic_load_N(iN *ptr, int ordering)
2005 // void __atomic_store_N(iN *ptr, iN val, int ordering)
2006 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
2007 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
2008 // int success_order, int failure_order)
2009 //
2010 // Note that these functions can be used for non-integer atomic
2011 // operations, the values just need to be bitcast to integers on the
2012 // way in and out.
2013 //
2014 // And, then, the generic variants. They look like the following:
2015 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
2016 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
2017 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
2018 // int ordering)
2019 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
2020 // void *desired, int success_order,
2021 // int failure_order)
2022 //
2023 // The different signatures are built up depending on the
2024 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
2025 // variables.
2026
2027 AllocaInst *AllocaCASExpected = nullptr;
2028 AllocaInst *AllocaValue = nullptr;
2029 AllocaInst *AllocaResult = nullptr;
2030
2031 Type *ResultTy;
2033 AttributeList Attr;
2034
2035 // 'size' argument.
2036 if (!UseSizedLibcall) {
2037 // Note, getIntPtrType is assumed equivalent to size_t.
2038 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
2039 }
2040
2041 // 'ptr' argument.
2042 // note: This assumes all address spaces share a common libfunc
2043 // implementation and that addresses are convertable. For systems without
2044 // that property, we'd need to extend this mechanism to support AS-specific
2045 // families of atomic intrinsics.
2046 Value *PtrVal = PointerOperand;
2047 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
2048 Args.push_back(PtrVal);
2049
2050 // 'expected' argument, if present.
2051 if (CASExpected) {
2052 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
2053 AllocaCASExpected->setAlignment(AllocaAlignment);
2054 Builder.CreateLifetimeStart(AllocaCASExpected);
2055 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
2056 Args.push_back(AllocaCASExpected);
2057 }
2058
2059 // 'val' argument ('desired' for cas), if present.
2060 if (ValueOperand) {
2061 if (UseSizedLibcall) {
2062 Value *IntValue =
2063 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
2064 Args.push_back(IntValue);
2065 } else {
2066 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
2067 AllocaValue->setAlignment(AllocaAlignment);
2068 Builder.CreateLifetimeStart(AllocaValue);
2069 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
2070 Args.push_back(AllocaValue);
2071 }
2072 }
2073
2074 // 'ret' argument.
2075 if (!CASExpected && HasResult && !UseSizedLibcall) {
2076 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
2077 AllocaResult->setAlignment(AllocaAlignment);
2078 Builder.CreateLifetimeStart(AllocaResult);
2079 Args.push_back(AllocaResult);
2080 }
2081
2082 // 'ordering' ('success_order' for cas) argument.
2083 Args.push_back(OrderingVal);
2084
2085 // 'failure_order' argument, if present.
2086 if (Ordering2Val)
2087 Args.push_back(Ordering2Val);
2088
2089 // Now, the return type.
2090 if (CASExpected) {
2091 ResultTy = Type::getInt1Ty(Ctx);
2092 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
2093 } else if (HasResult && UseSizedLibcall)
2094 ResultTy = SizedIntTy;
2095 else
2096 ResultTy = Type::getVoidTy(Ctx);
2097
2098 // Done with setting up arguments and return types, create the call:
2100 for (Value *Arg : Args)
2101 ArgTys.push_back(Arg->getType());
2102 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
2103 FunctionCallee LibcallFn = M->getOrInsertFunction(
2105 Attr);
2106 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
2107 Call->setAttributes(Attr);
2108 Value *Result = Call;
2109
2110 // And then, extract the results...
2111 if (ValueOperand && !UseSizedLibcall)
2112 Builder.CreateLifetimeEnd(AllocaValue);
2113
2114 if (CASExpected) {
2115 // The final result from the CAS is {load of 'expected' alloca, bool result
2116 // from call}
2117 Type *FinalResultTy = I->getType();
2118 Value *V = PoisonValue::get(FinalResultTy);
2119 Value *ExpectedOut = Builder.CreateAlignedLoad(
2120 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2121 Builder.CreateLifetimeEnd(AllocaCASExpected);
2122 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2123 V = Builder.CreateInsertValue(V, Result, 1);
2125 } else if (HasResult) {
2126 Value *V;
2127 if (UseSizedLibcall)
2128 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2129 else {
2130 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2131 AllocaAlignment);
2132 Builder.CreateLifetimeEnd(AllocaResult);
2133 }
2135 }
2136 I->eraseFromParent();
2137 return true;
2138}
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded, Instruction *MetadataSrc)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
static bool isIdempotentRMW(AtomicRMWInst &RMWI)
Return true if and only if the given instruction does not modify the memory location referenced.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
#define T
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ FSub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static LLVM_ABI StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
iterator end()
Definition BasicBlock.h:483
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
reverse_iterator rbegin()
Definition BasicBlock.h:486
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
reverse_iterator rend()
Definition BasicBlock.h:488
void setAttributes(AttributeList A)
Set the attributes for this call.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
BasicBlockListType::iterator iterator
Definition Function.h:69
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1901
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2607
LLVM_ABI CallInst * CreateLifetimeStart(Value *Ptr)
Create a lifetime.start intrinsic.
LLVM_ABI CallInst * CreateLifetimeEnd(Value *Ptr)
Create a lifetime.end intrinsic.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1867
UnreachableInst * CreateUnreachable()
Definition IRBuilder.h:1339
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2600
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2170
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition IRBuilder.h:2209
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2304
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2257
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2465
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2300
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1197
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1850
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1492
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2053
LLVMContext & getContext() const
Definition IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1551
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2165
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2479
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1914
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1191
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1886
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1573
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2180
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2762
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
Tracks which library functions to use for a particular subtarget.
LLVM_ABI RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
Record a mapping from subtarget to LibcallLoweringInfo.
const LibcallLoweringInfo & getLibcallLowering(const TargetSubtargetInfo &Subtarget) const
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Metadata node.
Definition Metadata.h:1078
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
LLVMContext & getContext() const
Get the global data context.
Definition Module.h:285
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static LLVM_ABI PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
virtual Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const
Perform a store-conditional operation to Addr.
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a bit test atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
virtual bool shouldInsertFencesForAtomic(const Instruction *I) const
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
virtual AtomicOrdering atomicOperationOrderAfterFenceSplit(const Instruction *I) const
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const
Perform a cmpxchg expansion using a target-specific method.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const
Perform a masked atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const
Perform a atomicrmw expansion using a target-specific way.
virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const
virtual void emitExpandAtomicStore(StoreInst *SI) const
Perform a atomic store using a target-specific way.
virtual AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const
Returns how the given atomic atomicrmw should be cast by the IR-level AtomicExpand pass.
virtual bool shouldInsertTrailingSeqCstFenceForAtomicStore(const Instruction *I) const
Whether AtomicExpandPass should automatically insert a seq_cst trailing fence without reducing the or...
virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const
Perform a masked cmpxchg using a target-specific intrinsic.
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual void emitExpandAtomicLoad(LoadInst *LI) const
Perform a atomic load using a target-specific way.
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a atomicrmw which the result is only used by comparison, using a target-specific intrinsic.
virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
virtual Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
virtual Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
Inserts in the IR a target-specific intrinsic specifying a fence.
virtual LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const
On some platforms, an AtomicRMW that never actually modifies the value (such as fetch_add of 0) can b...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
LLVM_ABI LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.cpp:1106
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void initializeAtomicExpandLegacyPass(PassRegistry &)
function_ref< void( IRBuilderBase &, Value *, Value *, Value *, Align, AtomicOrdering, SyncScope::ID, Value *&, Value *&, Instruction *)> CreateCmpXchgInstFun
Parameters (see the expansion example below): (the builder, addr, loaded, new_val,...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
LLVM_ABI char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
Matching combinators.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.