LLVM 23.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/Instruction.h"
38#include "llvm/IR/MDBuilder.h"
40#include "llvm/IR/Module.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/User.h"
44#include "llvm/IR/Value.h"
46#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "atomic-expand"
61
62namespace {
63
64class AtomicExpandImpl {
65 const TargetLowering *TLI = nullptr;
66 const LibcallLoweringInfo *LibcallLowering = nullptr;
67 const DataLayout *DL = nullptr;
68
69private:
70 void handleFailure(Instruction &FailedInst, const Twine &Msg) const {
71 LLVMContext &Ctx = FailedInst.getContext();
72
73 // TODO: Do not use generic error type.
74 Ctx.emitError(&FailedInst, Msg);
75
76 if (!FailedInst.getType()->isVoidTy())
77 FailedInst.replaceAllUsesWith(PoisonValue::get(FailedInst.getType()));
78 FailedInst.eraseFromParent();
79 }
80
81 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
82 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
83 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
84 bool tryExpandAtomicLoad(LoadInst *LI);
85 bool expandAtomicLoadToLL(LoadInst *LI);
86 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
87 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
88 bool tryExpandAtomicStore(StoreInst *SI);
89 void expandAtomicStoreToXChg(StoreInst *SI);
90 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
91 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
92 Value *
93 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
94 Align AddrAlign, AtomicOrdering MemOpOrder,
95 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
96 void expandAtomicOpToLLSC(
97 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
98 AtomicOrdering MemOpOrder,
99 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
100 void expandPartwordAtomicRMW(
102 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
103 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
104 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
105 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
106
107 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
108 static Value *insertRMWCmpXchgLoop(
109 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
110 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
111 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
112 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
113 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
114
115 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
116 bool isIdempotentRMW(AtomicRMWInst *RMWI);
117 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
118
119 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
120 Value *PointerOperand, Value *ValueOperand,
121 Value *CASExpected, AtomicOrdering Ordering,
122 AtomicOrdering Ordering2,
123 ArrayRef<RTLIB::Libcall> Libcalls);
124 void expandAtomicLoadToLibcall(LoadInst *LI);
125 void expandAtomicStoreToLibcall(StoreInst *LI);
126 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
127 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
128
129 friend bool
131 CreateCmpXchgInstFun CreateCmpXchg);
132
133 bool processAtomicInstr(Instruction *I);
134
135public:
136 bool run(Function &F,
137 const LibcallLoweringModuleAnalysisResult &LibcallResult,
138 const TargetMachine *TM);
139};
140
141class AtomicExpandLegacy : public FunctionPass {
142public:
143 static char ID; // Pass identification, replacement for typeid
144
145 AtomicExpandLegacy() : FunctionPass(ID) {}
146
147 void getAnalysisUsage(AnalysisUsage &AU) const override {
150 }
151
152 bool runOnFunction(Function &F) override;
153};
154
155// IRBuilder to be used for replacement atomic instructions.
156struct ReplacementIRBuilder
157 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
158 MDNode *MMRAMD = nullptr;
159
160 // Preserves the DebugLoc from I, and preserves still valid metadata.
161 // Enable StrictFP builder mode when appropriate.
162 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
163 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
165 [this](Instruction *I) { addMMRAMD(I); })) {
166 SetInsertPoint(I);
167 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
168 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
169 this->setIsFPConstrained(true);
170
171 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
172 }
173
174 void addMMRAMD(Instruction *I) {
176 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
177 }
178};
179
180} // end anonymous namespace
181
182char AtomicExpandLegacy::ID = 0;
183
184char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
185
187 "Expand Atomic instructions", false, false)
190INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
191 "Expand Atomic instructions", false, false)
192
193// Helper functions to retrieve the size of atomic instructions.
194static unsigned getAtomicOpSize(LoadInst *LI) {
195 const DataLayout &DL = LI->getDataLayout();
196 return DL.getTypeStoreSize(LI->getType());
197}
198
199static unsigned getAtomicOpSize(StoreInst *SI) {
200 const DataLayout &DL = SI->getDataLayout();
201 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
202}
203
204static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
205 const DataLayout &DL = RMWI->getDataLayout();
206 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
207}
208
209static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
210 const DataLayout &DL = CASI->getDataLayout();
211 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
212}
213
214/// Copy metadata that's safe to preserve when widening atomics.
216 const Instruction &Source) {
218 Source.getAllMetadata(MD);
219 LLVMContext &Ctx = Dest.getContext();
220 MDBuilder MDB(Ctx);
221
222 for (auto [ID, N] : MD) {
223 switch (ID) {
224 case LLVMContext::MD_dbg:
225 case LLVMContext::MD_tbaa:
226 case LLVMContext::MD_tbaa_struct:
227 case LLVMContext::MD_alias_scope:
228 case LLVMContext::MD_noalias:
229 case LLVMContext::MD_noalias_addrspace:
230 case LLVMContext::MD_access_group:
231 case LLVMContext::MD_mmra:
232 Dest.setMetadata(ID, N);
233 break;
234 default:
235 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
236 Dest.setMetadata(ID, N);
237 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
238 Dest.setMetadata(ID, N);
239
240 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
241 // uses.
242 break;
243 }
244 }
245}
246
247// Determine if a particular atomic operation has a supported size,
248// and is of appropriate alignment, to be passed through for target
249// lowering. (Versus turning into a __atomic libcall)
250template <typename Inst>
251static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
252 unsigned Size = getAtomicOpSize(I);
253 Align Alignment = I->getAlign();
254 return Alignment >= Size &&
256}
257
258bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
259 auto *LI = dyn_cast<LoadInst>(I);
260 auto *SI = dyn_cast<StoreInst>(I);
261 auto *RMWI = dyn_cast<AtomicRMWInst>(I);
262 auto *CASI = dyn_cast<AtomicCmpXchgInst>(I);
263
264 bool MadeChange = false;
265
266 // If the Size/Alignment is not supported, replace with a libcall.
267 if (LI) {
268 if (!LI->isAtomic())
269 return false;
270
271 if (!atomicSizeSupported(TLI, LI)) {
272 expandAtomicLoadToLibcall(LI);
273 return true;
274 }
275
276 if (TLI->shouldCastAtomicLoadInIR(LI) ==
277 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
278 I = LI = convertAtomicLoadToIntegerType(LI);
279 MadeChange = true;
280 }
281 } else if (SI) {
282 if (!SI->isAtomic())
283 return false;
284
285 if (!atomicSizeSupported(TLI, SI)) {
286 expandAtomicStoreToLibcall(SI);
287 return true;
288 }
289
290 if (TLI->shouldCastAtomicStoreInIR(SI) ==
291 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
292 I = SI = convertAtomicStoreToIntegerType(SI);
293 MadeChange = true;
294 }
295 } else if (RMWI) {
296 if (!atomicSizeSupported(TLI, RMWI)) {
297 expandAtomicRMWToLibcall(RMWI);
298 return true;
299 }
300
301 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
302 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
303 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
304 MadeChange = true;
305 }
306 } else if (CASI) {
307 if (!atomicSizeSupported(TLI, CASI)) {
308 expandAtomicCASToLibcall(CASI);
309 return true;
310 }
311
312 // TODO: when we're ready to make the change at the IR level, we can
313 // extend convertCmpXchgToInteger for floating point too.
314 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
315 // TODO: add a TLI hook to control this so that each target can
316 // convert to lowering the original type one at a time.
317 I = CASI = convertCmpXchgToIntegerType(CASI);
318 MadeChange = true;
319 }
320 } else
321 return false;
322
323 if (TLI->shouldInsertFencesForAtomic(I)) {
324 auto FenceOrdering = AtomicOrdering::Monotonic;
325 if (LI && isAcquireOrStronger(LI->getOrdering())) {
326 FenceOrdering = LI->getOrdering();
327 LI->setOrdering(AtomicOrdering::Monotonic);
328 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
329 FenceOrdering = SI->getOrdering();
330 SI->setOrdering(AtomicOrdering::Monotonic);
331 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
332 isAcquireOrStronger(RMWI->getOrdering()))) {
333 FenceOrdering = RMWI->getOrdering();
334 RMWI->setOrdering(AtomicOrdering::Monotonic);
335 } else if (CASI &&
337 TargetLoweringBase::AtomicExpansionKind::None &&
338 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
339 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
340 isAcquireOrStronger(CASI->getFailureOrdering()))) {
341 // If a compare and swap is lowered to LL/SC, we can do smarter fence
342 // insertion, with a stronger one on the success path than on the
343 // failure path. As a result, fence insertion is directly done by
344 // expandAtomicCmpXchg in that case.
345 FenceOrdering = CASI->getMergedOrdering();
346 auto CASOrdering = TLI->atomicOperationOrderAfterFenceSplit(CASI);
347
348 CASI->setSuccessOrdering(CASOrdering);
349 CASI->setFailureOrdering(CASOrdering);
350 }
351
352 if (FenceOrdering != AtomicOrdering::Monotonic) {
353 MadeChange |= bracketInstWithFences(I, FenceOrdering);
354 }
356 !(CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
357 TargetLoweringBase::AtomicExpansionKind::LLSC)) {
358 // CmpXchg LLSC is handled in expandAtomicCmpXchg().
359 IRBuilder Builder(I);
360 if (auto TrailingFence = TLI->emitTrailingFence(
361 Builder, I, AtomicOrdering::SequentiallyConsistent)) {
362 TrailingFence->moveAfter(I);
363 MadeChange = true;
364 }
365 }
366
367 if (LI)
368 MadeChange |= tryExpandAtomicLoad(LI);
369 else if (SI)
370 MadeChange |= tryExpandAtomicStore(SI);
371 else if (RMWI) {
372 // There are two different ways of expanding RMW instructions:
373 // - into a load if it is idempotent
374 // - into a Cmpxchg/LL-SC loop otherwise
375 // we try them in that order.
376
377 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
378 MadeChange = true;
379
380 } else {
381 MadeChange |= tryExpandAtomicRMW(RMWI);
382 }
383 } else if (CASI)
384 MadeChange |= tryExpandAtomicCmpXchg(CASI);
385
386 return MadeChange;
387}
388
389bool AtomicExpandImpl::run(
390 Function &F, const LibcallLoweringModuleAnalysisResult &LibcallResult,
391 const TargetMachine *TM) {
392 const auto *Subtarget = TM->getSubtargetImpl(F);
393 if (!Subtarget->enableAtomicExpand())
394 return false;
395 TLI = Subtarget->getTargetLowering();
396 LibcallLowering = &LibcallResult.getLibcallLowering(*Subtarget);
397 DL = &F.getDataLayout();
398
399 bool MadeChange = false;
400
401 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
402 BasicBlock *BB = &*BBI;
403
405
406 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
407 I = Next) {
408 Instruction &Inst = *I;
409 Next = std::next(I);
410
411 if (processAtomicInstr(&Inst)) {
412 MadeChange = true;
413
414 // New blocks may have been inserted.
415 BBE = F.end();
416 }
417 }
418 }
419
420 return MadeChange;
421}
422
423bool AtomicExpandLegacy::runOnFunction(Function &F) {
424
425 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
426 if (!TPC)
427 return false;
428 auto *TM = &TPC->getTM<TargetMachine>();
429
430 const LibcallLoweringModuleAnalysisResult &LibcallResult =
431 getAnalysis<LibcallLoweringInfoWrapper>().getResult(*F.getParent());
432 AtomicExpandImpl AE;
433 return AE.run(F, LibcallResult, TM);
434}
435
437 return new AtomicExpandLegacy();
438}
439
442 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
443
444 const LibcallLoweringModuleAnalysisResult *LibcallResult =
445 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(*F.getParent());
446
447 if (!LibcallResult) {
448 F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() +
449 "' analysis required");
450 return PreservedAnalyses::all();
451 }
452
453 AtomicExpandImpl AE;
454
455 bool Changed = AE.run(F, *LibcallResult, TM);
456 if (!Changed)
457 return PreservedAnalyses::all();
458
460}
461
462bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
463 AtomicOrdering Order) {
464 ReplacementIRBuilder Builder(I, *DL);
465
466 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
467
468 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
469 // We have a guard here because not every atomic operation generates a
470 // trailing fence.
471 if (TrailingFence)
472 TrailingFence->moveAfter(I);
473
474 return (LeadingFence || TrailingFence);
475}
476
477/// Get the iX type with the same bitwidth as T.
479AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
480 EVT VT = TLI->getMemValueType(DL, T);
481 unsigned BitWidth = VT.getStoreSizeInBits();
482 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
483 return IntegerType::get(T->getContext(), BitWidth);
484}
485
486/// Convert an atomic load of a non-integral type to an integer load of the
487/// equivalent bitwidth. See the function comment on
488/// convertAtomicStoreToIntegerType for background.
489LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
490 auto *M = LI->getModule();
491 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
492
493 ReplacementIRBuilder Builder(LI, *DL);
494
495 Value *Addr = LI->getPointerOperand();
496
497 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
498 NewLI->setAlignment(LI->getAlign());
499 NewLI->setVolatile(LI->isVolatile());
500 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
501 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
502
503 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
504 LI->replaceAllUsesWith(NewVal);
505 LI->eraseFromParent();
506 return NewLI;
507}
508
509AtomicRMWInst *
510AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
512
513 auto *M = RMWI->getModule();
514 Type *NewTy =
515 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
516
517 ReplacementIRBuilder Builder(RMWI, *DL);
518
519 Value *Addr = RMWI->getPointerOperand();
520 Value *Val = RMWI->getValOperand();
521 Value *NewVal = Val->getType()->isPointerTy()
522 ? Builder.CreatePtrToInt(Val, NewTy)
523 : Builder.CreateBitCast(Val, NewTy);
524
525 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
526 RMWI->getAlign(), RMWI->getOrdering(),
527 RMWI->getSyncScopeID());
528 NewRMWI->setVolatile(RMWI->isVolatile());
529 copyMetadataForAtomic(*NewRMWI, *RMWI);
530 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
531
532 Value *NewRVal = RMWI->getType()->isPointerTy()
533 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
534 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
535 RMWI->replaceAllUsesWith(NewRVal);
536 RMWI->eraseFromParent();
537 return NewRMWI;
538}
539
540bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
541 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
542 case TargetLoweringBase::AtomicExpansionKind::None:
543 return false;
544 case TargetLoweringBase::AtomicExpansionKind::LLSC:
545 expandAtomicOpToLLSC(
546 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
547 LI->getOrdering(),
548 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
549 return true;
550 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
551 return expandAtomicLoadToLL(LI);
552 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
553 return expandAtomicLoadToCmpXchg(LI);
554 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
555 LI->setAtomic(AtomicOrdering::NotAtomic);
556 return true;
557 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
558 TLI->emitExpandAtomicLoad(LI);
559 return true;
560 default:
561 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
562 }
563}
564
565bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
566 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
567 case TargetLoweringBase::AtomicExpansionKind::None:
568 return false;
569 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
570 TLI->emitExpandAtomicStore(SI);
571 return true;
572 case TargetLoweringBase::AtomicExpansionKind::Expand:
573 expandAtomicStoreToXChg(SI);
574 return true;
575 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
576 SI->setAtomic(AtomicOrdering::NotAtomic);
577 return true;
578 default:
579 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
580 }
581}
582
583bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
584 ReplacementIRBuilder Builder(LI, *DL);
585
586 // On some architectures, load-linked instructions are atomic for larger
587 // sizes than normal loads. For example, the only 64-bit load guaranteed
588 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
589 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
590 LI->getPointerOperand(), LI->getOrdering());
592
593 LI->replaceAllUsesWith(Val);
594 LI->eraseFromParent();
595
596 return true;
597}
598
599bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
600 ReplacementIRBuilder Builder(LI, *DL);
601 AtomicOrdering Order = LI->getOrdering();
602 if (Order == AtomicOrdering::Unordered)
603 Order = AtomicOrdering::Monotonic;
604
605 Value *Addr = LI->getPointerOperand();
606 Type *Ty = LI->getType();
607 Constant *DummyVal = Constant::getNullValue(Ty);
608
609 Value *Pair = Builder.CreateAtomicCmpXchg(
610 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
612 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
613
614 LI->replaceAllUsesWith(Loaded);
615 LI->eraseFromParent();
616
617 return true;
618}
619
620/// Convert an atomic store of a non-integral type to an integer store of the
621/// equivalent bitwidth. We used to not support floating point or vector
622/// atomics in the IR at all. The backends learned to deal with the bitcast
623/// idiom because that was the only way of expressing the notion of a atomic
624/// float or vector store. The long term plan is to teach each backend to
625/// instruction select from the original atomic store, but as a migration
626/// mechanism, we convert back to the old format which the backends understand.
627/// Each backend will need individual work to recognize the new format.
628StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
629 ReplacementIRBuilder Builder(SI, *DL);
630 auto *M = SI->getModule();
631 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
632 M->getDataLayout());
633 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
634
635 Value *Addr = SI->getPointerOperand();
636
637 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
638 NewSI->setAlignment(SI->getAlign());
639 NewSI->setVolatile(SI->isVolatile());
640 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
641 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
642 SI->eraseFromParent();
643 return NewSI;
644}
645
646void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
647 // This function is only called on atomic stores that are too large to be
648 // atomic if implemented as a native store. So we replace them by an
649 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
650 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
651 // It is the responsibility of the target to only signal expansion via
652 // shouldExpandAtomicRMW in cases where this is required and possible.
653 ReplacementIRBuilder Builder(SI, *DL);
654 AtomicOrdering Ordering = SI->getOrdering();
655 assert(Ordering != AtomicOrdering::NotAtomic);
656 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
657 ? AtomicOrdering::Monotonic
658 : Ordering;
659 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
660 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
661 SI->getAlign(), RMWOrdering);
662 SI->eraseFromParent();
663
664 // Now we have an appropriate swap instruction, lower it as usual.
665 tryExpandAtomicRMW(AI);
666}
667
668static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
669 Value *Loaded, Value *NewVal, Align AddrAlign,
670 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
671 Value *&Success, Value *&NewLoaded,
672 Instruction *MetadataSrc) {
673 Type *OrigTy = NewVal->getType();
674
675 // This code can go away when cmpxchg supports FP and vector types.
676 assert(!OrigTy->isPointerTy());
677 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
678 if (NeedBitcast) {
679 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
680 NewVal = Builder.CreateBitCast(NewVal, IntTy);
681 Loaded = Builder.CreateBitCast(Loaded, IntTy);
682 }
683
684 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
685 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
687 if (MetadataSrc)
688 copyMetadataForAtomic(*Pair, *MetadataSrc);
689
690 Success = Builder.CreateExtractValue(Pair, 1, "success");
691 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
692
693 if (NeedBitcast)
694 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
695}
696
697bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
698 LLVMContext &Ctx = AI->getModule()->getContext();
699 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
700 switch (Kind) {
701 case TargetLoweringBase::AtomicExpansionKind::None:
702 return false;
703 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
704 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
705 unsigned ValueSize = getAtomicOpSize(AI);
706 if (ValueSize < MinCASSize) {
707 expandPartwordAtomicRMW(AI,
708 TargetLoweringBase::AtomicExpansionKind::LLSC);
709 } else {
710 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
711 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
712 AI->getValOperand());
713 };
714 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
715 AI->getAlign(), AI->getOrdering(), PerformOp);
716 }
717 return true;
718 }
719 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
720 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
721 unsigned ValueSize = getAtomicOpSize(AI);
722 if (ValueSize < MinCASSize) {
723 expandPartwordAtomicRMW(AI,
724 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
725 } else {
727 Ctx.getSyncScopeNames(SSNs);
728 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
729 ? "system"
730 : SSNs[AI->getSyncScopeID()];
731 OptimizationRemarkEmitter ORE(AI->getFunction());
732 ORE.emit([&]() {
733 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
734 << "A compare and swap loop was generated for an atomic "
735 << AI->getOperationName(AI->getOperation()) << " operation at "
736 << MemScope << " memory scope";
737 });
739 }
740 return true;
741 }
742 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
743 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
744 unsigned ValueSize = getAtomicOpSize(AI);
745 if (ValueSize < MinCASSize) {
747 // Widen And/Or/Xor and give the target another chance at expanding it.
750 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
751 return true;
752 }
753 }
754 expandAtomicRMWToMaskedIntrinsic(AI);
755 return true;
756 }
757 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
759 return true;
760 }
761 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
763 return true;
764 }
765 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
766 return lowerAtomicRMWInst(AI);
767 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
768 TLI->emitExpandAtomicRMW(AI);
769 return true;
770 default:
771 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
772 }
773}
774
775namespace {
776
777struct PartwordMaskValues {
778 // These three fields are guaranteed to be set by createMaskInstrs.
779 Type *WordType = nullptr;
780 Type *ValueType = nullptr;
781 Type *IntValueType = nullptr;
782 Value *AlignedAddr = nullptr;
783 Align AlignedAddrAlignment;
784 // The remaining fields can be null.
785 Value *ShiftAmt = nullptr;
786 Value *Mask = nullptr;
787 Value *Inv_Mask = nullptr;
788};
789
790[[maybe_unused]]
791raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
792 auto PrintObj = [&O](auto *V) {
793 if (V)
794 O << *V;
795 else
796 O << "nullptr";
797 O << '\n';
798 };
799 O << "PartwordMaskValues {\n";
800 O << " WordType: ";
801 PrintObj(PMV.WordType);
802 O << " ValueType: ";
803 PrintObj(PMV.ValueType);
804 O << " AlignedAddr: ";
805 PrintObj(PMV.AlignedAddr);
806 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
807 O << " ShiftAmt: ";
808 PrintObj(PMV.ShiftAmt);
809 O << " Mask: ";
810 PrintObj(PMV.Mask);
811 O << " Inv_Mask: ";
812 PrintObj(PMV.Inv_Mask);
813 O << "}\n";
814 return O;
815}
816
817} // end anonymous namespace
818
819/// This is a helper function which builds instructions to provide
820/// values necessary for partword atomic operations. It takes an
821/// incoming address, Addr, and ValueType, and constructs the address,
822/// shift-amounts and masks needed to work with a larger value of size
823/// WordSize.
824///
825/// AlignedAddr: Addr rounded down to a multiple of WordSize
826///
827/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
828/// from AlignAddr for it to have the same value as if
829/// ValueType was loaded from Addr.
830///
831/// Mask: Value to mask with the value loaded from AlignAddr to
832/// include only the part that would've been loaded from Addr.
833///
834/// Inv_Mask: The inverse of Mask.
835static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
837 Value *Addr, Align AddrAlign,
838 unsigned MinWordSize) {
839 PartwordMaskValues PMV;
840
841 Module *M = I->getModule();
842 LLVMContext &Ctx = M->getContext();
843 const DataLayout &DL = M->getDataLayout();
844 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
845
846 PMV.ValueType = PMV.IntValueType = ValueType;
847 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
848 PMV.IntValueType =
849 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
850
851 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
852 : ValueType;
853 if (PMV.ValueType == PMV.WordType) {
854 PMV.AlignedAddr = Addr;
855 PMV.AlignedAddrAlignment = AddrAlign;
856 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
857 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
858 return PMV;
859 }
860
861 PMV.AlignedAddrAlignment = Align(MinWordSize);
862
863 assert(ValueSize < MinWordSize);
864
865 PointerType *PtrTy = cast<PointerType>(Addr->getType());
866 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
867 Value *PtrLSB;
868
869 if (AddrAlign < MinWordSize) {
870 PMV.AlignedAddr = Builder.CreateIntrinsic(
871 Intrinsic::ptrmask, {PtrTy, IntTy},
872 {Addr, ConstantInt::getSigned(IntTy, ~(uint64_t)(MinWordSize - 1))},
873 nullptr, "AlignedAddr");
874
875 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
876 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
877 } else {
878 // If the alignment is high enough, the LSB are known 0.
879 PMV.AlignedAddr = Addr;
880 PtrLSB = ConstantInt::getNullValue(IntTy);
881 }
882
883 if (DL.isLittleEndian()) {
884 // turn bytes into bits
885 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
886 } else {
887 // turn bytes into bits, and count from the other side.
888 PMV.ShiftAmt = Builder.CreateShl(
889 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
890 }
891
892 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
893 PMV.Mask = Builder.CreateShl(
894 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
895 "Mask");
896
897 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
898
899 return PMV;
900}
901
902static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
903 const PartwordMaskValues &PMV) {
904 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
905 if (PMV.WordType == PMV.ValueType)
906 return WideWord;
907
908 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
909 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
910 return Builder.CreateBitCast(Trunc, PMV.ValueType);
911}
912
913static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
914 Value *Updated, const PartwordMaskValues &PMV) {
915 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
916 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
917 if (PMV.WordType == PMV.ValueType)
918 return Updated;
919
920 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
921
922 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
923 Value *Shift =
924 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
925 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
926 Value *Or = Builder.CreateOr(And, Shift, "inserted");
927 return Or;
928}
929
930/// Emit IR to implement a masked version of a given atomicrmw
931/// operation. (That is, only the bits under the Mask should be
932/// affected by the operation)
934 IRBuilderBase &Builder, Value *Loaded,
935 Value *Shifted_Inc, Value *Inc,
936 const PartwordMaskValues &PMV) {
937 // TODO: update to use
938 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
939 // to merge bits from two values without requiring PMV.Inv_Mask.
940 switch (Op) {
941 case AtomicRMWInst::Xchg: {
942 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
943 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
944 return FinalVal;
945 }
949 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
952 case AtomicRMWInst::Nand: {
953 // The other arithmetic ops need to be masked into place.
954 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
955 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
956 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
957 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
958 return FinalVal;
959 }
974 // Finally, other ops will operate on the full value, so truncate down to
975 // the original size, and expand out again after doing the
976 // operation. Bitcasts will be inserted for FP values.
977 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
978 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
979 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
980 return FinalVal;
981 }
982 default:
983 llvm_unreachable("Unknown atomic op");
984 }
985}
986
987/// Expand a sub-word atomicrmw operation into an appropriate
988/// word-sized operation.
989///
990/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
991/// way as a typical atomicrmw expansion. The only difference here is
992/// that the operation inside of the loop may operate upon only a
993/// part of the value.
994void AtomicExpandImpl::expandPartwordAtomicRMW(
995 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
996 // Widen And/Or/Xor and give the target another chance at expanding it.
1000 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
1001 return;
1002 }
1003 AtomicOrdering MemOpOrder = AI->getOrdering();
1004 SyncScope::ID SSID = AI->getSyncScopeID();
1005
1006 ReplacementIRBuilder Builder(AI, *DL);
1007
1008 PartwordMaskValues PMV =
1009 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1010 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1011
1012 Value *ValOperand_Shifted = nullptr;
1015 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
1016 ValOperand_Shifted =
1017 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
1018 "ValOperand_Shifted");
1019 }
1020
1021 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
1022 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
1023 AI->getValOperand(), PMV);
1024 };
1025
1026 Value *OldResult;
1027 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
1028 OldResult = insertRMWCmpXchgLoop(
1029 Builder, PMV.WordType, PMV.AlignedAddr, PMV.AlignedAddrAlignment,
1030 MemOpOrder, SSID, PerformPartwordOp, createCmpXchgInstFun, AI);
1031 } else {
1032 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
1033 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1034 PMV.AlignedAddrAlignment, MemOpOrder,
1035 PerformPartwordOp);
1036 }
1037
1038 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1039 AI->replaceAllUsesWith(FinalOldResult);
1040 AI->eraseFromParent();
1041}
1042
1043// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1044AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1045 ReplacementIRBuilder Builder(AI, *DL);
1047
1049 Op == AtomicRMWInst::And) &&
1050 "Unable to widen operation");
1051
1052 PartwordMaskValues PMV =
1053 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1054 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1055
1056 Value *ValOperand_Shifted =
1057 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
1058 PMV.ShiftAmt, "ValOperand_Shifted");
1059
1060 Value *NewOperand;
1061
1062 if (Op == AtomicRMWInst::And)
1063 NewOperand =
1064 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
1065 else
1066 NewOperand = ValOperand_Shifted;
1067
1068 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1069 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1070 AI->getOrdering(), AI->getSyncScopeID());
1071
1072 copyMetadataForAtomic(*NewAI, *AI);
1073
1074 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1075 AI->replaceAllUsesWith(FinalOldResult);
1076 AI->eraseFromParent();
1077 return NewAI;
1078}
1079
1080bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1081 // The basic idea here is that we're expanding a cmpxchg of a
1082 // smaller memory size up to a word-sized cmpxchg. To do this, we
1083 // need to add a retry-loop for strong cmpxchg, so that
1084 // modifications to other parts of the word don't cause a spurious
1085 // failure.
1086
1087 // This generates code like the following:
1088 // [[Setup mask values PMV.*]]
1089 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1090 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1091 // %InitLoaded = load i32* %addr
1092 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1093 // br partword.cmpxchg.loop
1094 // partword.cmpxchg.loop:
1095 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1096 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1097 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1098 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1099 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1100 // i32 %FullWord_NewVal success_ordering failure_ordering
1101 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1102 // %Success = extractvalue { i32, i1 } %NewCI, 1
1103 // br i1 %Success, label %partword.cmpxchg.end,
1104 // label %partword.cmpxchg.failure
1105 // partword.cmpxchg.failure:
1106 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1107 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1108 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1109 // label %partword.cmpxchg.end
1110 // partword.cmpxchg.end:
1111 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1112 // %FinalOldVal = trunc i32 %tmp1 to i8
1113 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1114 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1115
1116 Value *Addr = CI->getPointerOperand();
1117 Value *Cmp = CI->getCompareOperand();
1118 Value *NewVal = CI->getNewValOperand();
1119
1120 BasicBlock *BB = CI->getParent();
1121 Function *F = BB->getParent();
1122 ReplacementIRBuilder Builder(CI, *DL);
1123 LLVMContext &Ctx = Builder.getContext();
1124
1125 BasicBlock *EndBB =
1126 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1127 auto FailureBB =
1128 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1129 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1130
1131 // The split call above "helpfully" added a branch at the end of BB
1132 // (to the wrong place).
1133 std::prev(BB->end())->eraseFromParent();
1134 Builder.SetInsertPoint(BB);
1135
1136 PartwordMaskValues PMV =
1137 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1138 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1139
1140 // Shift the incoming values over, into the right location in the word.
1141 Value *NewVal_Shifted =
1142 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1143 Value *Cmp_Shifted =
1144 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1145
1146 // Load the entire current word, and mask into place the expected and new
1147 // values
1148 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1149 InitLoaded->setVolatile(CI->isVolatile());
1150 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1151 Builder.CreateBr(LoopBB);
1152
1153 // partword.cmpxchg.loop:
1154 Builder.SetInsertPoint(LoopBB);
1155 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1156 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1157
1158 // Mask/Or the expected and new values into place in the loaded word.
1159 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1160 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1161 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1162 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1164 NewCI->setVolatile(CI->isVolatile());
1165 // When we're building a strong cmpxchg, we need a loop, so you
1166 // might think we could use a weak cmpxchg inside. But, using strong
1167 // allows the below comparison for ShouldContinue, and we're
1168 // expecting the underlying cmpxchg to be a machine instruction,
1169 // which is strong anyways.
1170 NewCI->setWeak(CI->isWeak());
1171
1172 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1173 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1174
1175 if (CI->isWeak())
1176 Builder.CreateBr(EndBB);
1177 else
1178 Builder.CreateCondBr(Success, EndBB, FailureBB);
1179
1180 // partword.cmpxchg.failure:
1181 Builder.SetInsertPoint(FailureBB);
1182 // Upon failure, verify that the masked-out part of the loaded value
1183 // has been modified. If it didn't, abort the cmpxchg, since the
1184 // masked-in part must've.
1185 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1186 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1187 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1188
1189 // Add the second value to the phi from above
1190 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1191
1192 // partword.cmpxchg.end:
1193 Builder.SetInsertPoint(CI);
1194
1195 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1196 Value *Res = PoisonValue::get(CI->getType());
1197 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1198 Res = Builder.CreateInsertValue(Res, Success, 1);
1199
1200 CI->replaceAllUsesWith(Res);
1201 CI->eraseFromParent();
1202 return true;
1203}
1204
1205void AtomicExpandImpl::expandAtomicOpToLLSC(
1206 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1207 AtomicOrdering MemOpOrder,
1208 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1209 ReplacementIRBuilder Builder(I, *DL);
1210 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1211 MemOpOrder, PerformOp);
1212
1213 I->replaceAllUsesWith(Loaded);
1214 I->eraseFromParent();
1215}
1216
1217void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1218 ReplacementIRBuilder Builder(AI, *DL);
1219
1220 PartwordMaskValues PMV =
1221 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1222 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1223
1224 // The value operand must be sign-extended for signed min/max so that the
1225 // target's signed comparison instructions can be used. Otherwise, just
1226 // zero-ext.
1227 Instruction::CastOps CastOp = Instruction::ZExt;
1228 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1229 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1230 CastOp = Instruction::SExt;
1231
1232 Value *ValOperand_Shifted = Builder.CreateShl(
1233 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1234 PMV.ShiftAmt, "ValOperand_Shifted");
1235 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1236 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1237 AI->getOrdering());
1238 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1239 AI->replaceAllUsesWith(FinalOldResult);
1240 AI->eraseFromParent();
1241}
1242
1243void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1244 AtomicCmpXchgInst *CI) {
1245 ReplacementIRBuilder Builder(CI, *DL);
1246
1247 PartwordMaskValues PMV = createMaskInstrs(
1248 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1249 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1250
1251 Value *CmpVal_Shifted = Builder.CreateShl(
1252 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1253 "CmpVal_Shifted");
1254 Value *NewVal_Shifted = Builder.CreateShl(
1255 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1256 "NewVal_Shifted");
1258 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1259 CI->getMergedOrdering());
1260 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1261 Value *Res = PoisonValue::get(CI->getType());
1262 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1263 Value *Success = Builder.CreateICmpEQ(
1264 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1265 Res = Builder.CreateInsertValue(Res, Success, 1);
1266
1267 CI->replaceAllUsesWith(Res);
1268 CI->eraseFromParent();
1269}
1270
1271Value *AtomicExpandImpl::insertRMWLLSCLoop(
1272 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1273 AtomicOrdering MemOpOrder,
1274 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1275 LLVMContext &Ctx = Builder.getContext();
1276 BasicBlock *BB = Builder.GetInsertBlock();
1277 Function *F = BB->getParent();
1278
1279 assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) &&
1280 "Expected at least natural alignment at this point.");
1281
1282 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1283 //
1284 // The standard expansion we produce is:
1285 // [...]
1286 // atomicrmw.start:
1287 // %loaded = @load.linked(%addr)
1288 // %new = some_op iN %loaded, %incr
1289 // %stored = @store_conditional(%new, %addr)
1290 // %try_again = icmp i32 ne %stored, 0
1291 // br i1 %try_again, label %loop, label %atomicrmw.end
1292 // atomicrmw.end:
1293 // [...]
1294 BasicBlock *ExitBB =
1295 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1296 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1297
1298 // The split call above "helpfully" added a branch at the end of BB (to the
1299 // wrong place).
1300 std::prev(BB->end())->eraseFromParent();
1301 Builder.SetInsertPoint(BB);
1302 Builder.CreateBr(LoopBB);
1303
1304 // Start the main loop block now that we've taken care of the preliminaries.
1305 Builder.SetInsertPoint(LoopBB);
1306 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1307
1308 Value *NewVal = PerformOp(Builder, Loaded);
1309
1310 Value *StoreSuccess =
1311 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1312 Value *TryAgain = Builder.CreateICmpNE(
1313 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1314
1315 Instruction *CondBr = Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1316
1317 // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
1318 // hard to predict precise branch weigths we mark the branch as "unknown"
1319 // (50/50) to prevent misleading optimizations.
1321
1322 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1323 return Loaded;
1324}
1325
1326/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1327/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1328/// IR. As a migration step, we convert back to what use to be the standard
1329/// way to represent a pointer cmpxchg so that we can update backends one by
1330/// one.
1331AtomicCmpXchgInst *
1332AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1333 auto *M = CI->getModule();
1334 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1335 M->getDataLayout());
1336
1337 ReplacementIRBuilder Builder(CI, *DL);
1338
1339 Value *Addr = CI->getPointerOperand();
1340
1341 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1342 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1343
1344 auto *NewCI = Builder.CreateAtomicCmpXchg(
1345 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1346 CI->getFailureOrdering(), CI->getSyncScopeID());
1347 NewCI->setVolatile(CI->isVolatile());
1348 NewCI->setWeak(CI->isWeak());
1349 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1350
1351 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1352 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1353
1354 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1355
1356 Value *Res = PoisonValue::get(CI->getType());
1357 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1358 Res = Builder.CreateInsertValue(Res, Succ, 1);
1359
1360 CI->replaceAllUsesWith(Res);
1361 CI->eraseFromParent();
1362 return NewCI;
1363}
1364
1365bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1366 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1367 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1368 Value *Addr = CI->getPointerOperand();
1369 BasicBlock *BB = CI->getParent();
1370 Function *F = BB->getParent();
1371 LLVMContext &Ctx = F->getContext();
1372 // If shouldInsertFencesForAtomic() returns true, then the target does not
1373 // want to deal with memory orders, and emitLeading/TrailingFence should take
1374 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1375 // should preserve the ordering.
1376 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1377 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1378 ? AtomicOrdering::Monotonic
1379 : CI->getMergedOrdering();
1380
1381 // In implementations which use a barrier to achieve release semantics, we can
1382 // delay emitting this barrier until we know a store is actually going to be
1383 // attempted. The cost of this delay is that we need 2 copies of the block
1384 // emitting the load-linked, affecting code size.
1385 //
1386 // Ideally, this logic would be unconditional except for the minsize check
1387 // since in other cases the extra blocks naturally collapse down to the
1388 // minimal loop. Unfortunately, this puts too much stress on later
1389 // optimisations so we avoid emitting the extra logic in those cases too.
1390 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1391 SuccessOrder != AtomicOrdering::Monotonic &&
1392 SuccessOrder != AtomicOrdering::Acquire &&
1393 !F->hasMinSize();
1394
1395 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1396 // do it even on minsize.
1397 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1398
1399 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1400 //
1401 // The full expansion we produce is:
1402 // [...]
1403 // %aligned.addr = ...
1404 // cmpxchg.start:
1405 // %unreleasedload = @load.linked(%aligned.addr)
1406 // %unreleasedload.extract = extract value from %unreleasedload
1407 // %should_store = icmp eq %unreleasedload.extract, %desired
1408 // br i1 %should_store, label %cmpxchg.releasingstore,
1409 // label %cmpxchg.nostore
1410 // cmpxchg.releasingstore:
1411 // fence?
1412 // br label cmpxchg.trystore
1413 // cmpxchg.trystore:
1414 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1415 // [%releasedload, %cmpxchg.releasedload]
1416 // %updated.new = insert %new into %loaded.trystore
1417 // %stored = @store_conditional(%updated.new, %aligned.addr)
1418 // %success = icmp eq i32 %stored, 0
1419 // br i1 %success, label %cmpxchg.success,
1420 // label %cmpxchg.releasedload/%cmpxchg.failure
1421 // cmpxchg.releasedload:
1422 // %releasedload = @load.linked(%aligned.addr)
1423 // %releasedload.extract = extract value from %releasedload
1424 // %should_store = icmp eq %releasedload.extract, %desired
1425 // br i1 %should_store, label %cmpxchg.trystore,
1426 // label %cmpxchg.failure
1427 // cmpxchg.success:
1428 // fence?
1429 // br label %cmpxchg.end
1430 // cmpxchg.nostore:
1431 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1432 // [%releasedload,
1433 // %cmpxchg.releasedload/%cmpxchg.trystore]
1434 // @load_linked_fail_balance()?
1435 // br label %cmpxchg.failure
1436 // cmpxchg.failure:
1437 // fence?
1438 // br label %cmpxchg.end
1439 // cmpxchg.end:
1440 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1441 // [%loaded.trystore, %cmpxchg.trystore]
1442 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1443 // %loaded = extract value from %loaded.exit
1444 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1445 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1446 // [...]
1447 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1448 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1449 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1450 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1451 auto ReleasedLoadBB =
1452 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1453 auto TryStoreBB =
1454 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1455 auto ReleasingStoreBB =
1456 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1457 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1458
1459 ReplacementIRBuilder Builder(CI, *DL);
1460
1461 // The split call above "helpfully" added a branch at the end of BB (to the
1462 // wrong place), but we might want a fence too. It's easiest to just remove
1463 // the branch entirely.
1464 std::prev(BB->end())->eraseFromParent();
1465 Builder.SetInsertPoint(BB);
1466 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1467 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1468
1469 PartwordMaskValues PMV =
1470 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1471 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1472 Builder.CreateBr(StartBB);
1473
1474 // Start the main loop block now that we've taken care of the preliminaries.
1475 Builder.SetInsertPoint(StartBB);
1476 Value *UnreleasedLoad =
1477 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1478 Value *UnreleasedLoadExtract =
1479 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1480 Value *ShouldStore = Builder.CreateICmpEQ(
1481 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1482
1483 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1484 // jump straight past that fence instruction (if it exists).
1485 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
1486 MDBuilder(F->getContext()).createLikelyBranchWeights());
1487
1488 Builder.SetInsertPoint(ReleasingStoreBB);
1489 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1490 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1491 Builder.CreateBr(TryStoreBB);
1492
1493 Builder.SetInsertPoint(TryStoreBB);
1494 PHINode *LoadedTryStore =
1495 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1496 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1497 Value *NewValueInsert =
1498 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1499 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1500 PMV.AlignedAddr, MemOpOrder);
1501 StoreSuccess = Builder.CreateICmpEQ(
1502 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1503 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1504 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1505 CI->isWeak() ? FailureBB : RetryBB,
1506 MDBuilder(F->getContext()).createLikelyBranchWeights());
1507
1508 Builder.SetInsertPoint(ReleasedLoadBB);
1509 Value *SecondLoad;
1510 if (HasReleasedLoadBB) {
1511 SecondLoad =
1512 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1513 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1514 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1515 CI->getCompareOperand(), "should_store");
1516
1517 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1518 // jump straight past that fence instruction (if it exists).
1519 Builder.CreateCondBr(
1520 ShouldStore, TryStoreBB, NoStoreBB,
1521 MDBuilder(F->getContext()).createLikelyBranchWeights());
1522 // Update PHI node in TryStoreBB.
1523 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1524 } else
1525 Builder.CreateUnreachable();
1526
1527 // Make sure later instructions don't get reordered with a fence if
1528 // necessary.
1529 Builder.SetInsertPoint(SuccessBB);
1530 if (ShouldInsertFencesForAtomic ||
1532 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1533 Builder.CreateBr(ExitBB);
1534
1535 Builder.SetInsertPoint(NoStoreBB);
1536 PHINode *LoadedNoStore =
1537 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1538 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1539 if (HasReleasedLoadBB)
1540 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1541
1542 // In the failing case, where we don't execute the store-conditional, the
1543 // target might want to balance out the load-linked with a dedicated
1544 // instruction (e.g., on ARM, clearing the exclusive monitor).
1546 Builder.CreateBr(FailureBB);
1547
1548 Builder.SetInsertPoint(FailureBB);
1549 PHINode *LoadedFailure =
1550 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1551 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1552 if (CI->isWeak())
1553 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1554 if (ShouldInsertFencesForAtomic)
1555 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1556 Builder.CreateBr(ExitBB);
1557
1558 // Finally, we have control-flow based knowledge of whether the cmpxchg
1559 // succeeded or not. We expose this to later passes by converting any
1560 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1561 // PHI.
1562 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1563 PHINode *LoadedExit =
1564 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1565 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1566 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1567 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1568 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1569 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1570
1571 // This is the "exit value" from the cmpxchg expansion. It may be of
1572 // a type wider than the one in the cmpxchg instruction.
1573 Value *LoadedFull = LoadedExit;
1574
1575 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1576 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1577
1578 // Look for any users of the cmpxchg that are just comparing the loaded value
1579 // against the desired one, and replace them with the CFG-derived version.
1581 for (auto *User : CI->users()) {
1582 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1583 if (!EV)
1584 continue;
1585
1586 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1587 "weird extraction from { iN, i1 }");
1588
1589 if (EV->getIndices()[0] == 0)
1590 EV->replaceAllUsesWith(Loaded);
1591 else
1593
1594 PrunedInsts.push_back(EV);
1595 }
1596
1597 // We can remove the instructions now we're no longer iterating through them.
1598 for (auto *EV : PrunedInsts)
1599 EV->eraseFromParent();
1600
1601 if (!CI->use_empty()) {
1602 // Some use of the full struct return that we don't understand has happened,
1603 // so we've got to reconstruct it properly.
1604 Value *Res;
1605 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1606 Res = Builder.CreateInsertValue(Res, Success, 1);
1607
1608 CI->replaceAllUsesWith(Res);
1609 }
1610
1611 CI->eraseFromParent();
1612 return true;
1613}
1614
1615bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1616 // TODO: Add floating point support.
1617 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1618 if (!C)
1619 return false;
1620
1621 switch (RMWI->getOperation()) {
1622 case AtomicRMWInst::Add:
1623 case AtomicRMWInst::Sub:
1624 case AtomicRMWInst::Or:
1625 case AtomicRMWInst::Xor:
1626 return C->isZero();
1627 case AtomicRMWInst::And:
1628 return C->isMinusOne();
1629 case AtomicRMWInst::Min:
1630 return C->isMaxValue(true);
1631 case AtomicRMWInst::Max:
1632 return C->isMinValue(true);
1634 return C->isMaxValue(false);
1636 return C->isMinValue(false);
1637 default:
1638 return false;
1639 }
1640}
1641
1642bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1643 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1644 tryExpandAtomicLoad(ResultingLoad);
1645 return true;
1646 }
1647 return false;
1648}
1649
1650Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1651 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1652 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1653 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1654 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1655 LLVMContext &Ctx = Builder.getContext();
1656 BasicBlock *BB = Builder.GetInsertBlock();
1657 Function *F = BB->getParent();
1658
1659 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1660 //
1661 // The standard expansion we produce is:
1662 // [...]
1663 // %init_loaded = load atomic iN* %addr
1664 // br label %loop
1665 // loop:
1666 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1667 // %new = some_op iN %loaded, %incr
1668 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1669 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1670 // %success = extractvalue { iN, i1 } %pair, 1
1671 // br i1 %success, label %atomicrmw.end, label %loop
1672 // atomicrmw.end:
1673 // [...]
1674 BasicBlock *ExitBB =
1675 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1676 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1677
1678 // The split call above "helpfully" added a branch at the end of BB (to the
1679 // wrong place), but we want a load. It's easiest to just remove
1680 // the branch entirely.
1681 std::prev(BB->end())->eraseFromParent();
1682 Builder.SetInsertPoint(BB);
1683 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1684 Builder.CreateBr(LoopBB);
1685
1686 // Start the main loop block now that we've taken care of the preliminaries.
1687 Builder.SetInsertPoint(LoopBB);
1688 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1689 Loaded->addIncoming(InitLoaded, BB);
1690
1691 Value *NewVal = PerformOp(Builder, Loaded);
1692
1693 Value *NewLoaded = nullptr;
1694 Value *Success = nullptr;
1695
1696 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1697 MemOpOrder == AtomicOrdering::Unordered
1698 ? AtomicOrdering::Monotonic
1699 : MemOpOrder,
1700 SSID, Success, NewLoaded, MetadataSrc);
1701 assert(Success && NewLoaded);
1702
1703 Loaded->addIncoming(NewLoaded, LoopBB);
1704
1705 Instruction *CondBr = Builder.CreateCondBr(Success, ExitBB, LoopBB);
1706
1707 // Atomic RMW expands to a cmpxchg loop, Since precise branch weights
1708 // cannot be easily determined here, we mark the branch as "unknown" (50/50)
1709 // to prevent misleading optimizations.
1711
1712 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1713 return NewLoaded;
1714}
1715
1716bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1717 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1718 unsigned ValueSize = getAtomicOpSize(CI);
1719
1720 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1721 default:
1722 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1723 case TargetLoweringBase::AtomicExpansionKind::None:
1724 if (ValueSize < MinCASSize)
1725 return expandPartwordCmpXchg(CI);
1726 return false;
1727 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1728 return expandAtomicCmpXchg(CI);
1729 }
1730 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1731 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1732 return true;
1733 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1734 return lowerAtomicCmpXchgInst(CI);
1735 case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
1736 TLI->emitExpandAtomicCmpXchg(CI);
1737 return true;
1738 }
1739 }
1740}
1741
1742// Note: This function is exposed externally by AtomicExpandUtils.h
1744 CreateCmpXchgInstFun CreateCmpXchg) {
1745 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1746 Builder.setIsFPConstrained(
1747 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1748
1749 // FIXME: If FP exceptions are observable, we should force them off for the
1750 // loop for the FP atomics.
1751 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1752 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1753 AI->getOrdering(), AI->getSyncScopeID(),
1754 [&](IRBuilderBase &Builder, Value *Loaded) {
1755 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1756 AI->getValOperand());
1757 },
1758 CreateCmpXchg, /*MetadataSrc=*/AI);
1759
1760 AI->replaceAllUsesWith(Loaded);
1761 AI->eraseFromParent();
1762 return true;
1763}
1764
1765// In order to use one of the sized library calls such as
1766// __atomic_fetch_add_4, the alignment must be sufficient, the size
1767// must be one of the potentially-specialized sizes, and the value
1768// type must actually exist in C on the target (otherwise, the
1769// function wouldn't actually be defined.)
1770static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1771 const DataLayout &DL) {
1772 // TODO: "LargestSize" is an approximation for "largest type that
1773 // you can express in C". It seems to be the case that int128 is
1774 // supported on all 64-bit platforms, otherwise only up to 64-bit
1775 // integers are supported. If we get this wrong, then we'll try to
1776 // call a sized libcall that doesn't actually exist. There should
1777 // really be some more reliable way in LLVM of determining integer
1778 // sizes which are valid in the target's C ABI...
1779 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1780 return Alignment >= Size &&
1781 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1782 Size <= LargestSize;
1783}
1784
1785void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1786 static const RTLIB::Libcall Libcalls[6] = {
1787 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1788 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1789 unsigned Size = getAtomicOpSize(I);
1790
1791 bool expanded = expandAtomicOpToLibcall(
1792 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1793 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1794 if (!expanded)
1795 handleFailure(*I, "unsupported atomic load");
1796}
1797
1798void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1799 static const RTLIB::Libcall Libcalls[6] = {
1800 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1801 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1802 unsigned Size = getAtomicOpSize(I);
1803
1804 bool expanded = expandAtomicOpToLibcall(
1805 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1806 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1807 if (!expanded)
1808 handleFailure(*I, "unsupported atomic store");
1809}
1810
1811void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1812 static const RTLIB::Libcall Libcalls[6] = {
1813 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1814 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1815 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1816 unsigned Size = getAtomicOpSize(I);
1817
1818 bool expanded = expandAtomicOpToLibcall(
1819 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1820 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1821 Libcalls);
1822 if (!expanded)
1823 handleFailure(*I, "unsupported cmpxchg");
1824}
1825
1827 static const RTLIB::Libcall LibcallsXchg[6] = {
1828 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1829 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1830 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1831 static const RTLIB::Libcall LibcallsAdd[6] = {
1832 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1833 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1834 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1835 static const RTLIB::Libcall LibcallsSub[6] = {
1836 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1837 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1838 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1839 static const RTLIB::Libcall LibcallsAnd[6] = {
1840 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1841 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1842 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1843 static const RTLIB::Libcall LibcallsOr[6] = {
1844 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1845 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1846 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1847 static const RTLIB::Libcall LibcallsXor[6] = {
1848 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1849 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1850 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1851 static const RTLIB::Libcall LibcallsNand[6] = {
1852 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1853 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1854 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1855
1856 switch (Op) {
1858 llvm_unreachable("Should not have BAD_BINOP.");
1860 return ArrayRef(LibcallsXchg);
1861 case AtomicRMWInst::Add:
1862 return ArrayRef(LibcallsAdd);
1863 case AtomicRMWInst::Sub:
1864 return ArrayRef(LibcallsSub);
1865 case AtomicRMWInst::And:
1866 return ArrayRef(LibcallsAnd);
1867 case AtomicRMWInst::Or:
1868 return ArrayRef(LibcallsOr);
1869 case AtomicRMWInst::Xor:
1870 return ArrayRef(LibcallsXor);
1872 return ArrayRef(LibcallsNand);
1873 case AtomicRMWInst::Max:
1874 case AtomicRMWInst::Min:
1887 // No atomic libcalls are available for these.
1888 return {};
1889 }
1890 llvm_unreachable("Unexpected AtomicRMW operation.");
1891}
1892
1893void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1894 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1895
1896 unsigned Size = getAtomicOpSize(I);
1897
1898 bool Success = false;
1899 if (!Libcalls.empty())
1900 Success = expandAtomicOpToLibcall(
1901 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1902 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1903
1904 // The expansion failed: either there were no libcalls at all for
1905 // the operation (min/max), or there were only size-specialized
1906 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1907 // CAS libcall, via a CAS loop, instead.
1908 if (!Success) {
1910 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1911 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1912 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded,
1913 Instruction *MetadataSrc) {
1914 // Create the CAS instruction normally...
1915 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1916 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1918 if (MetadataSrc)
1919 copyMetadataForAtomic(*Pair, *MetadataSrc);
1920
1921 Success = Builder.CreateExtractValue(Pair, 1, "success");
1922 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1923
1924 // ...and then expand the CAS into a libcall.
1925 expandAtomicCASToLibcall(Pair);
1926 });
1927 }
1928}
1929
1930// A helper routine for the above expandAtomic*ToLibcall functions.
1931//
1932// 'Libcalls' contains an array of enum values for the particular
1933// ATOMIC libcalls to be emitted. All of the other arguments besides
1934// 'I' are extracted from the Instruction subclass by the
1935// caller. Depending on the particular call, some will be null.
1936bool AtomicExpandImpl::expandAtomicOpToLibcall(
1937 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1938 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1939 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1940 assert(Libcalls.size() == 6);
1941
1942 LLVMContext &Ctx = I->getContext();
1943 Module *M = I->getModule();
1944 const DataLayout &DL = M->getDataLayout();
1945 IRBuilder<> Builder(I);
1946 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1947
1948 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1949 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1950
1951 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1952
1953 // TODO: the "order" argument type is "int", not int32. So
1954 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1955 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1956 Constant *OrderingVal =
1957 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1958 Constant *Ordering2Val = nullptr;
1959 if (CASExpected) {
1960 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1961 Ordering2Val =
1962 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1963 }
1964 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1965
1966 RTLIB::Libcall RTLibType;
1967 if (UseSizedLibcall) {
1968 switch (Size) {
1969 case 1:
1970 RTLibType = Libcalls[1];
1971 break;
1972 case 2:
1973 RTLibType = Libcalls[2];
1974 break;
1975 case 4:
1976 RTLibType = Libcalls[3];
1977 break;
1978 case 8:
1979 RTLibType = Libcalls[4];
1980 break;
1981 case 16:
1982 RTLibType = Libcalls[5];
1983 break;
1984 }
1985 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1986 RTLibType = Libcalls[0];
1987 } else {
1988 // Can't use sized function, and there's no generic for this
1989 // operation, so give up.
1990 return false;
1991 }
1992
1993 RTLIB::LibcallImpl LibcallImpl = LibcallLowering->getLibcallImpl(RTLibType);
1994 if (LibcallImpl == RTLIB::Unsupported) {
1995 // This target does not implement the requested atomic libcall so give up.
1996 return false;
1997 }
1998
1999 // Build up the function call. There's two kinds. First, the sized
2000 // variants. These calls are going to be one of the following (with
2001 // N=1,2,4,8,16):
2002 // iN __atomic_load_N(iN *ptr, int ordering)
2003 // void __atomic_store_N(iN *ptr, iN val, int ordering)
2004 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
2005 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
2006 // int success_order, int failure_order)
2007 //
2008 // Note that these functions can be used for non-integer atomic
2009 // operations, the values just need to be bitcast to integers on the
2010 // way in and out.
2011 //
2012 // And, then, the generic variants. They look like the following:
2013 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
2014 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
2015 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
2016 // int ordering)
2017 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
2018 // void *desired, int success_order,
2019 // int failure_order)
2020 //
2021 // The different signatures are built up depending on the
2022 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
2023 // variables.
2024
2025 AllocaInst *AllocaCASExpected = nullptr;
2026 AllocaInst *AllocaValue = nullptr;
2027 AllocaInst *AllocaResult = nullptr;
2028
2029 Type *ResultTy;
2031 AttributeList Attr;
2032
2033 // 'size' argument.
2034 if (!UseSizedLibcall) {
2035 // Note, getIntPtrType is assumed equivalent to size_t.
2036 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
2037 }
2038
2039 // 'ptr' argument.
2040 // note: This assumes all address spaces share a common libfunc
2041 // implementation and that addresses are convertable. For systems without
2042 // that property, we'd need to extend this mechanism to support AS-specific
2043 // families of atomic intrinsics.
2044 Value *PtrVal = PointerOperand;
2045 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
2046 Args.push_back(PtrVal);
2047
2048 // 'expected' argument, if present.
2049 if (CASExpected) {
2050 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
2051 AllocaCASExpected->setAlignment(AllocaAlignment);
2052 Builder.CreateLifetimeStart(AllocaCASExpected);
2053 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
2054 Args.push_back(AllocaCASExpected);
2055 }
2056
2057 // 'val' argument ('desired' for cas), if present.
2058 if (ValueOperand) {
2059 if (UseSizedLibcall) {
2060 Value *IntValue =
2061 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
2062 Args.push_back(IntValue);
2063 } else {
2064 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
2065 AllocaValue->setAlignment(AllocaAlignment);
2066 Builder.CreateLifetimeStart(AllocaValue);
2067 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
2068 Args.push_back(AllocaValue);
2069 }
2070 }
2071
2072 // 'ret' argument.
2073 if (!CASExpected && HasResult && !UseSizedLibcall) {
2074 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
2075 AllocaResult->setAlignment(AllocaAlignment);
2076 Builder.CreateLifetimeStart(AllocaResult);
2077 Args.push_back(AllocaResult);
2078 }
2079
2080 // 'ordering' ('success_order' for cas) argument.
2081 Args.push_back(OrderingVal);
2082
2083 // 'failure_order' argument, if present.
2084 if (Ordering2Val)
2085 Args.push_back(Ordering2Val);
2086
2087 // Now, the return type.
2088 if (CASExpected) {
2089 ResultTy = Type::getInt1Ty(Ctx);
2090 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
2091 } else if (HasResult && UseSizedLibcall)
2092 ResultTy = SizedIntTy;
2093 else
2094 ResultTy = Type::getVoidTy(Ctx);
2095
2096 // Done with setting up arguments and return types, create the call:
2098 for (Value *Arg : Args)
2099 ArgTys.push_back(Arg->getType());
2100 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
2101 FunctionCallee LibcallFn = M->getOrInsertFunction(
2103 Attr);
2104 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
2105 Call->setAttributes(Attr);
2106 Value *Result = Call;
2107
2108 // And then, extract the results...
2109 if (ValueOperand && !UseSizedLibcall)
2110 Builder.CreateLifetimeEnd(AllocaValue);
2111
2112 if (CASExpected) {
2113 // The final result from the CAS is {load of 'expected' alloca, bool result
2114 // from call}
2115 Type *FinalResultTy = I->getType();
2116 Value *V = PoisonValue::get(FinalResultTy);
2117 Value *ExpectedOut = Builder.CreateAlignedLoad(
2118 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2119 Builder.CreateLifetimeEnd(AllocaCASExpected);
2120 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2121 V = Builder.CreateInsertValue(V, Result, 1);
2123 } else if (HasResult) {
2124 Value *V;
2125 if (UseSizedLibcall)
2126 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2127 else {
2128 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2129 AllocaAlignment);
2130 Builder.CreateLifetimeEnd(AllocaResult);
2131 }
2133 }
2134 I->eraseFromParent();
2135 return true;
2136}
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded, Instruction *MetadataSrc)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
static bool isIdempotentRMW(AtomicRMWInst &RMWI)
Return true if and only if the given instruction does not modify the memory location referenced.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
#define T
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ FSub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static LLVM_ABI StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
iterator end()
Definition BasicBlock.h:483
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
reverse_iterator rbegin()
Definition BasicBlock.h:486
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
reverse_iterator rend()
Definition BasicBlock.h:488
void setAttributes(AttributeList A)
Set the attributes for this call.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
BasicBlockListType::iterator iterator
Definition Function.h:69
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:729
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1905
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2608
LLVM_ABI CallInst * CreateLifetimeStart(Value *Ptr)
Create a lifetime.start intrinsic.
LLVM_ABI CallInst * CreateLifetimeEnd(Value *Ptr)
Create a lifetime.end intrinsic.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1871
UnreachableInst * CreateUnreachable()
Definition IRBuilder.h:1342
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2601
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2171
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition IRBuilder.h:2210
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2305
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2258
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2466
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2301
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1200
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1854
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1495
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2054
LLVMContext & getContext() const
Definition IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1554
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2166
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2480
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1918
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1194
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1890
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1576
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2181
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2775
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
Tracks which library functions to use for a particular subtarget.
LLVM_ABI RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
Record a mapping from subtarget to LibcallLoweringInfo.
const LibcallLoweringInfo & getLibcallLowering(const TargetSubtargetInfo &Subtarget) const
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Metadata node.
Definition Metadata.h:1080
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
LLVMContext & getContext() const
Get the global data context.
Definition Module.h:285
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
virtual Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const
Perform a store-conditional operation to Addr.
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a bit test atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
virtual bool shouldInsertFencesForAtomic(const Instruction *I) const
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
virtual AtomicOrdering atomicOperationOrderAfterFenceSplit(const Instruction *I) const
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const
Perform a cmpxchg expansion using a target-specific method.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const
Perform a masked atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const
Perform a atomicrmw expansion using a target-specific way.
virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const
virtual void emitExpandAtomicStore(StoreInst *SI) const
Perform a atomic store using a target-specific way.
virtual AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const
Returns how the given atomic atomicrmw should be cast by the IR-level AtomicExpand pass.
virtual bool shouldInsertTrailingSeqCstFenceForAtomicStore(const Instruction *I) const
Whether AtomicExpandPass should automatically insert a seq_cst trailing fence without reducing the or...
virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const
Perform a masked cmpxchg using a target-specific intrinsic.
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual void emitExpandAtomicLoad(LoadInst *LI) const
Perform a atomic load using a target-specific way.
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a atomicrmw which the result is only used by comparison, using a target-specific intrinsic.
virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
virtual Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
virtual Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
Inserts in the IR a target-specific intrinsic specifying a fence.
virtual LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const
On some platforms, an AtomicRMW that never actually modifies the value (such as fetch_add of 0) can b...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:267
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:184
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:139
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
function_ref< void( IRBuilderBase &, Value *, Value *, Value *, Align, AtomicOrdering, SyncScope::ID, Value *&, Value *&, Instruction *)> CreateCmpXchgInstFun
Parameters (see the expansion example below): (the builder, addr, loaded, new_val,...
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
LLVM_ABI char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:412
Matching combinators.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.