LLVM 23.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
27#include "llvm/IR/Attributes.h"
28#include "llvm/IR/BasicBlock.h"
29#include "llvm/IR/Constant.h"
30#include "llvm/IR/Constants.h"
31#include "llvm/IR/DataLayout.h"
33#include "llvm/IR/Function.h"
34#include "llvm/IR/IRBuilder.h"
35#include "llvm/IR/Instruction.h"
37#include "llvm/IR/MDBuilder.h"
39#include "llvm/IR/Module.h"
41#include "llvm/IR/Type.h"
42#include "llvm/IR/User.h"
43#include "llvm/IR/Value.h"
45#include "llvm/Pass.h"
48#include "llvm/Support/Debug.h"
53#include <cassert>
54#include <cstdint>
55#include <iterator>
56
57using namespace llvm;
58
59#define DEBUG_TYPE "atomic-expand"
60
61namespace {
62
63class AtomicExpandImpl {
64 const TargetLowering *TLI = nullptr;
65 const LibcallLoweringInfo *LibcallLowering = nullptr;
66 const DataLayout *DL = nullptr;
67
68private:
69 /// Callback type for emitting a cmpxchg instruction during RMW expansion.
70 /// Parameters: (Builder, Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
71 /// SSID, IsVolatile, /* OUT */ Success, /* OUT */ NewLoaded,
72 /// MetadataSrc)
73 using CreateCmpXchgInstFun = function_ref<void(
76
77 void handleFailure(Instruction &FailedInst, const Twine &Msg) const {
78 LLVMContext &Ctx = FailedInst.getContext();
79
80 // TODO: Do not use generic error type.
81 Ctx.emitError(&FailedInst, Msg);
82
83 if (!FailedInst.getType()->isVoidTy())
84 FailedInst.replaceAllUsesWith(PoisonValue::get(FailedInst.getType()));
85 FailedInst.eraseFromParent();
86 }
87
88 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
89 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
90 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
91 bool tryExpandAtomicLoad(LoadInst *LI);
92 bool expandAtomicLoadToLL(LoadInst *LI);
93 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
94 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
95 bool tryExpandAtomicStore(StoreInst *SI);
96 void expandAtomicStoreToXChg(StoreInst *SI);
97 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
98 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
99 Value *
100 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
101 Align AddrAlign, AtomicOrdering MemOpOrder,
102 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
103 void expandAtomicOpToLLSC(
104 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
105 AtomicOrdering MemOpOrder,
106 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
107 void expandPartwordAtomicRMW(
109 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
110 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
111 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
112 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
113
114 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
115 static Value *insertRMWCmpXchgLoop(
116 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
117 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
118 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
119 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
120 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
121
122 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
123 bool isIdempotentRMW(AtomicRMWInst *RMWI);
124 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
125
126 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
127 Value *PointerOperand, Value *ValueOperand,
128 Value *CASExpected, AtomicOrdering Ordering,
129 AtomicOrdering Ordering2,
130 ArrayRef<RTLIB::Libcall> Libcalls);
131 void expandAtomicLoadToLibcall(LoadInst *LI);
132 void expandAtomicStoreToLibcall(StoreInst *LI);
133 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
134 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
135
136 bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
137 CreateCmpXchgInstFun CreateCmpXchg);
138
139 bool processAtomicInstr(Instruction *I);
140
141public:
142 bool run(Function &F,
143 const LibcallLoweringModuleAnalysisResult &LibcallResult,
144 const TargetMachine *TM);
145};
146
147class AtomicExpandLegacy : public FunctionPass {
148public:
149 static char ID; // Pass identification, replacement for typeid
150
151 AtomicExpandLegacy() : FunctionPass(ID) {}
152
153 void getAnalysisUsage(AnalysisUsage &AU) const override {
156 }
157
158 bool runOnFunction(Function &F) override;
159};
160
161// IRBuilder to be used for replacement atomic instructions.
162struct ReplacementIRBuilder
163 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
164 MDNode *MMRAMD = nullptr;
165
166 // Preserves the DebugLoc from I, and preserves still valid metadata.
167 // Enable StrictFP builder mode when appropriate.
168 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
169 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
171 [this](Instruction *I) { addMMRAMD(I); })) {
172 SetInsertPoint(I);
173 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
174 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
175 this->setIsFPConstrained(true);
176
177 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
178 }
179
180 void addMMRAMD(Instruction *I) {
182 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
183 }
184};
185
186} // end anonymous namespace
187
188char AtomicExpandLegacy::ID = 0;
189
190char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
191
193 "Expand Atomic instructions", false, false)
196INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
197 "Expand Atomic instructions", false, false)
198
199// Helper functions to retrieve the size of atomic instructions.
200static unsigned getAtomicOpSize(LoadInst *LI) {
201 const DataLayout &DL = LI->getDataLayout();
202 return DL.getTypeStoreSize(LI->getType());
203}
204
205static unsigned getAtomicOpSize(StoreInst *SI) {
206 const DataLayout &DL = SI->getDataLayout();
207 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
208}
209
210static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
211 const DataLayout &DL = RMWI->getDataLayout();
212 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
213}
214
215static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
216 const DataLayout &DL = CASI->getDataLayout();
217 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
218}
219
220/// Copy metadata that's safe to preserve when widening atomics.
222 const Instruction &Source) {
224 Source.getAllMetadata(MD);
225 LLVMContext &Ctx = Dest.getContext();
226 MDBuilder MDB(Ctx);
227
228 for (auto [ID, N] : MD) {
229 switch (ID) {
230 case LLVMContext::MD_dbg:
231 case LLVMContext::MD_tbaa:
232 case LLVMContext::MD_tbaa_struct:
233 case LLVMContext::MD_alias_scope:
234 case LLVMContext::MD_noalias:
235 case LLVMContext::MD_noalias_addrspace:
236 case LLVMContext::MD_access_group:
237 case LLVMContext::MD_mmra:
238 Dest.setMetadata(ID, N);
239 break;
240 default:
241 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
242 Dest.setMetadata(ID, N);
243 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
244 Dest.setMetadata(ID, N);
245
246 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
247 // uses.
248 break;
249 }
250 }
251}
252
253// Determine if a particular atomic operation has a supported size,
254// and is of appropriate alignment, to be passed through for target
255// lowering. (Versus turning into a __atomic libcall)
256template <typename Inst>
257static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
258 unsigned Size = getAtomicOpSize(I);
259 Align Alignment = I->getAlign();
260 return Alignment >= Size &&
262}
263
264bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
265 auto *LI = dyn_cast<LoadInst>(I);
266 auto *SI = dyn_cast<StoreInst>(I);
267 auto *RMWI = dyn_cast<AtomicRMWInst>(I);
268 auto *CASI = dyn_cast<AtomicCmpXchgInst>(I);
269
270 bool MadeChange = false;
271
272 // If the Size/Alignment is not supported, replace with a libcall.
273 if (LI) {
274 if (!LI->isAtomic())
275 return false;
276
277 if (!atomicSizeSupported(TLI, LI)) {
278 expandAtomicLoadToLibcall(LI);
279 return true;
280 }
281
282 if (TLI->shouldCastAtomicLoadInIR(LI) ==
283 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
284 I = LI = convertAtomicLoadToIntegerType(LI);
285 MadeChange = true;
286 }
287 } else if (SI) {
288 if (!SI->isAtomic())
289 return false;
290
291 if (!atomicSizeSupported(TLI, SI)) {
292 expandAtomicStoreToLibcall(SI);
293 return true;
294 }
295
296 if (TLI->shouldCastAtomicStoreInIR(SI) ==
297 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
298 I = SI = convertAtomicStoreToIntegerType(SI);
299 MadeChange = true;
300 }
301 } else if (RMWI) {
302 if (!atomicSizeSupported(TLI, RMWI)) {
303 expandAtomicRMWToLibcall(RMWI);
304 return true;
305 }
306
307 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
308 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
309 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
310 MadeChange = true;
311 }
312 } else if (CASI) {
313 if (!atomicSizeSupported(TLI, CASI)) {
314 expandAtomicCASToLibcall(CASI);
315 return true;
316 }
317
318 // TODO: when we're ready to make the change at the IR level, we can
319 // extend convertCmpXchgToInteger for floating point too.
320 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
321 // TODO: add a TLI hook to control this so that each target can
322 // convert to lowering the original type one at a time.
323 I = CASI = convertCmpXchgToIntegerType(CASI);
324 MadeChange = true;
325 }
326 } else
327 return false;
328
329 if (TLI->shouldInsertFencesForAtomic(I)) {
330 auto FenceOrdering = AtomicOrdering::Monotonic;
331 if (LI && isAcquireOrStronger(LI->getOrdering())) {
332 FenceOrdering = LI->getOrdering();
333 LI->setOrdering(AtomicOrdering::Monotonic);
334 } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
335 FenceOrdering = SI->getOrdering();
336 SI->setOrdering(AtomicOrdering::Monotonic);
337 } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
338 isAcquireOrStronger(RMWI->getOrdering()))) {
339 FenceOrdering = RMWI->getOrdering();
340 RMWI->setOrdering(TLI->atomicOperationOrderAfterFenceSplit(RMWI));
341 } else if (CASI &&
343 TargetLoweringBase::AtomicExpansionKind::None &&
344 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
345 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
346 isAcquireOrStronger(CASI->getFailureOrdering()))) {
347 // If a compare and swap is lowered to LL/SC, we can do smarter fence
348 // insertion, with a stronger one on the success path than on the
349 // failure path. As a result, fence insertion is directly done by
350 // expandAtomicCmpXchg in that case.
351 FenceOrdering = CASI->getMergedOrdering();
352 auto CASOrdering = TLI->atomicOperationOrderAfterFenceSplit(CASI);
353
354 CASI->setSuccessOrdering(CASOrdering);
355 CASI->setFailureOrdering(CASOrdering);
356 }
357
358 if (FenceOrdering != AtomicOrdering::Monotonic) {
359 MadeChange |= bracketInstWithFences(I, FenceOrdering);
360 }
362 !(CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
363 TargetLoweringBase::AtomicExpansionKind::LLSC)) {
364 // CmpXchg LLSC is handled in expandAtomicCmpXchg().
365 IRBuilder Builder(I);
366 if (auto TrailingFence = TLI->emitTrailingFence(
367 Builder, I, AtomicOrdering::SequentiallyConsistent)) {
368 TrailingFence->moveAfter(I);
369 MadeChange = true;
370 }
371 }
372
373 if (LI)
374 MadeChange |= tryExpandAtomicLoad(LI);
375 else if (SI)
376 MadeChange |= tryExpandAtomicStore(SI);
377 else if (RMWI) {
378 // There are two different ways of expanding RMW instructions:
379 // - into a load if it is idempotent
380 // - into a Cmpxchg/LL-SC loop otherwise
381 // we try them in that order.
382
383 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
384 MadeChange = true;
385
386 } else {
387 MadeChange |= tryExpandAtomicRMW(RMWI);
388 }
389 } else if (CASI)
390 MadeChange |= tryExpandAtomicCmpXchg(CASI);
391
392 return MadeChange;
393}
394
395bool AtomicExpandImpl::run(
396 Function &F, const LibcallLoweringModuleAnalysisResult &LibcallResult,
397 const TargetMachine *TM) {
398 const auto *Subtarget = TM->getSubtargetImpl(F);
399 if (!Subtarget->enableAtomicExpand())
400 return false;
401 TLI = Subtarget->getTargetLowering();
402 LibcallLowering = &LibcallResult.getLibcallLowering(*Subtarget);
403 DL = &F.getDataLayout();
404
405 bool MadeChange = false;
406
407 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
408 BasicBlock *BB = &*BBI;
409
411
412 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
413 I = Next) {
414 Instruction &Inst = *I;
415 Next = std::next(I);
416
417 if (processAtomicInstr(&Inst)) {
418 MadeChange = true;
419
420 // New blocks may have been inserted.
421 BBE = F.end();
422 }
423 }
424 }
425
426 return MadeChange;
427}
428
429bool AtomicExpandLegacy::runOnFunction(Function &F) {
430
431 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
432 if (!TPC)
433 return false;
434 auto *TM = &TPC->getTM<TargetMachine>();
435
436 const LibcallLoweringModuleAnalysisResult &LibcallResult =
437 getAnalysis<LibcallLoweringInfoWrapper>().getResult(*F.getParent());
438 AtomicExpandImpl AE;
439 return AE.run(F, LibcallResult, TM);
440}
441
443 return new AtomicExpandLegacy();
444}
445
448 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
449
450 const LibcallLoweringModuleAnalysisResult *LibcallResult =
451 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(*F.getParent());
452
453 if (!LibcallResult) {
454 F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() +
455 "' analysis required");
456 return PreservedAnalyses::all();
457 }
458
459 AtomicExpandImpl AE;
460
461 bool Changed = AE.run(F, *LibcallResult, TM);
462 if (!Changed)
463 return PreservedAnalyses::all();
464
466}
467
468bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
469 AtomicOrdering Order) {
470 ReplacementIRBuilder Builder(I, *DL);
471
472 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
473
474 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
475 // We have a guard here because not every atomic operation generates a
476 // trailing fence.
477 if (TrailingFence)
478 TrailingFence->moveAfter(I);
479
480 return (LeadingFence || TrailingFence);
481}
482
483/// Get the iX type with the same bitwidth as T.
485AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
486 EVT VT = TLI->getMemValueType(DL, T);
487 unsigned BitWidth = VT.getStoreSizeInBits();
488 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
489 return IntegerType::get(T->getContext(), BitWidth);
490}
491
492/// Convert an atomic load of a non-integral type to an integer load of the
493/// equivalent bitwidth. See the function comment on
494/// convertAtomicStoreToIntegerType for background.
495LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
496 auto *M = LI->getModule();
497 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
498
499 ReplacementIRBuilder Builder(LI, *DL);
500
501 Value *Addr = LI->getPointerOperand();
502
503 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
504 NewLI->setAlignment(LI->getAlign());
505 NewLI->setVolatile(LI->isVolatile());
506 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
507 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
508
509 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
510 LI->replaceAllUsesWith(NewVal);
511 LI->eraseFromParent();
512 return NewLI;
513}
514
515AtomicRMWInst *
516AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
518
519 auto *M = RMWI->getModule();
520 Type *NewTy =
521 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
522
523 ReplacementIRBuilder Builder(RMWI, *DL);
524
525 Value *Addr = RMWI->getPointerOperand();
526 Value *Val = RMWI->getValOperand();
527 Value *NewVal = Val->getType()->isPointerTy()
528 ? Builder.CreatePtrToInt(Val, NewTy)
529 : Builder.CreateBitCast(Val, NewTy);
530
531 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
532 RMWI->getAlign(), RMWI->getOrdering(),
533 RMWI->getSyncScopeID());
534 NewRMWI->setVolatile(RMWI->isVolatile());
535 copyMetadataForAtomic(*NewRMWI, *RMWI);
536 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
537
538 Value *NewRVal = RMWI->getType()->isPointerTy()
539 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
540 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
541 RMWI->replaceAllUsesWith(NewRVal);
542 RMWI->eraseFromParent();
543 return NewRMWI;
544}
545
546bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
547 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
548 case TargetLoweringBase::AtomicExpansionKind::None:
549 return false;
550 case TargetLoweringBase::AtomicExpansionKind::LLSC:
551 expandAtomicOpToLLSC(
552 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
553 LI->getOrdering(),
554 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
555 return true;
556 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
557 return expandAtomicLoadToLL(LI);
558 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
559 return expandAtomicLoadToCmpXchg(LI);
560 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
561 LI->setAtomic(AtomicOrdering::NotAtomic);
562 return true;
563 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
564 TLI->emitExpandAtomicLoad(LI);
565 return true;
566 default:
567 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
568 }
569}
570
571bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
572 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
573 case TargetLoweringBase::AtomicExpansionKind::None:
574 return false;
575 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
576 TLI->emitExpandAtomicStore(SI);
577 return true;
578 case TargetLoweringBase::AtomicExpansionKind::Expand:
579 expandAtomicStoreToXChg(SI);
580 return true;
581 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
582 SI->setAtomic(AtomicOrdering::NotAtomic);
583 return true;
584 default:
585 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
586 }
587}
588
589bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
590 ReplacementIRBuilder Builder(LI, *DL);
591
592 // On some architectures, load-linked instructions are atomic for larger
593 // sizes than normal loads. For example, the only 64-bit load guaranteed
594 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
595 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
596 LI->getPointerOperand(), LI->getOrdering());
598
599 LI->replaceAllUsesWith(Val);
600 LI->eraseFromParent();
601
602 return true;
603}
604
605bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
606 ReplacementIRBuilder Builder(LI, *DL);
607 AtomicOrdering Order = LI->getOrdering();
608 if (Order == AtomicOrdering::Unordered)
609 Order = AtomicOrdering::Monotonic;
610
611 Value *Addr = LI->getPointerOperand();
612 Type *Ty = LI->getType();
613 Constant *DummyVal = Constant::getNullValue(Ty);
614
615 Value *Pair = Builder.CreateAtomicCmpXchg(
616 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
618 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
619
620 LI->replaceAllUsesWith(Loaded);
621 LI->eraseFromParent();
622
623 return true;
624}
625
626/// Convert an atomic store of a non-integral type to an integer store of the
627/// equivalent bitwidth. We used to not support floating point or vector
628/// atomics in the IR at all. The backends learned to deal with the bitcast
629/// idiom because that was the only way of expressing the notion of a atomic
630/// float or vector store. The long term plan is to teach each backend to
631/// instruction select from the original atomic store, but as a migration
632/// mechanism, we convert back to the old format which the backends understand.
633/// Each backend will need individual work to recognize the new format.
634StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
635 ReplacementIRBuilder Builder(SI, *DL);
636 auto *M = SI->getModule();
637 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
638 M->getDataLayout());
639 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
640
641 Value *Addr = SI->getPointerOperand();
642
643 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
644 NewSI->setAlignment(SI->getAlign());
645 NewSI->setVolatile(SI->isVolatile());
646 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
647 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
648 SI->eraseFromParent();
649 return NewSI;
650}
651
652void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
653 // This function is only called on atomic stores that are too large to be
654 // atomic if implemented as a native store. So we replace them by an
655 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
656 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
657 // It is the responsibility of the target to only signal expansion via
658 // shouldExpandAtomicRMW in cases where this is required and possible.
659 ReplacementIRBuilder Builder(SI, *DL);
660 AtomicOrdering Ordering = SI->getOrdering();
661 assert(Ordering != AtomicOrdering::NotAtomic);
662 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
663 ? AtomicOrdering::Monotonic
664 : Ordering;
665 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
666 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
667 SI->getAlign(), RMWOrdering);
668 SI->eraseFromParent();
669
670 // Now we have an appropriate swap instruction, lower it as usual.
671 tryExpandAtomicRMW(AI);
672}
673
674static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
675 Value *Loaded, Value *NewVal, Align AddrAlign,
676 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
677 Value *&Success, Value *&NewLoaded,
678 Instruction *MetadataSrc) {
679 Type *OrigTy = NewVal->getType();
680
681 // This code can go away when cmpxchg supports FP and vector types.
682 assert(!OrigTy->isPointerTy());
683 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
684 if (NeedBitcast) {
685 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
686 NewVal = Builder.CreateBitCast(NewVal, IntTy);
687 Loaded = Builder.CreateBitCast(Loaded, IntTy);
688 }
689
690 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
691 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
693 if (MetadataSrc)
694 copyMetadataForAtomic(*Pair, *MetadataSrc);
695
696 Success = Builder.CreateExtractValue(Pair, 1, "success");
697 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
698
699 if (NeedBitcast)
700 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
701}
702
703bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
704 LLVMContext &Ctx = AI->getModule()->getContext();
705 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
706 switch (Kind) {
707 case TargetLoweringBase::AtomicExpansionKind::None:
708 return false;
709 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
710 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
711 unsigned ValueSize = getAtomicOpSize(AI);
712 if (ValueSize < MinCASSize) {
713 expandPartwordAtomicRMW(AI,
714 TargetLoweringBase::AtomicExpansionKind::LLSC);
715 } else {
716 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
717 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
718 AI->getValOperand());
719 };
720 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
721 AI->getAlign(), AI->getOrdering(), PerformOp);
722 }
723 return true;
724 }
725 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
726 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
727 unsigned ValueSize = getAtomicOpSize(AI);
728 if (ValueSize < MinCASSize) {
729 expandPartwordAtomicRMW(AI,
730 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
731 } else {
733 Ctx.getSyncScopeNames(SSNs);
734 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
735 ? "system"
736 : SSNs[AI->getSyncScopeID()];
737 OptimizationRemarkEmitter ORE(AI->getFunction());
738 ORE.emit([&]() {
739 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
740 << "A compare and swap loop was generated for an atomic "
741 << AI->getOperationName(AI->getOperation()) << " operation at "
742 << MemScope << " memory scope";
743 });
744 expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
745 }
746 return true;
747 }
748 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
749 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
750 unsigned ValueSize = getAtomicOpSize(AI);
751 if (ValueSize < MinCASSize) {
753 // Widen And/Or/Xor and give the target another chance at expanding it.
756 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
757 return true;
758 }
759 }
760 expandAtomicRMWToMaskedIntrinsic(AI);
761 return true;
762 }
763 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
765 return true;
766 }
767 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
769 return true;
770 }
771 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
772 return lowerAtomicRMWInst(AI);
773 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
774 TLI->emitExpandAtomicRMW(AI);
775 return true;
776 default:
777 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
778 }
779}
780
781namespace {
782
783struct PartwordMaskValues {
784 // These three fields are guaranteed to be set by createMaskInstrs.
785 Type *WordType = nullptr;
786 Type *ValueType = nullptr;
787 Type *IntValueType = nullptr;
788 Value *AlignedAddr = nullptr;
789 Align AlignedAddrAlignment;
790 // The remaining fields can be null.
791 Value *ShiftAmt = nullptr;
792 Value *Mask = nullptr;
793 Value *Inv_Mask = nullptr;
794};
795
796[[maybe_unused]]
797raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
798 auto PrintObj = [&O](auto *V) {
799 if (V)
800 O << *V;
801 else
802 O << "nullptr";
803 O << '\n';
804 };
805 O << "PartwordMaskValues {\n";
806 O << " WordType: ";
807 PrintObj(PMV.WordType);
808 O << " ValueType: ";
809 PrintObj(PMV.ValueType);
810 O << " AlignedAddr: ";
811 PrintObj(PMV.AlignedAddr);
812 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
813 O << " ShiftAmt: ";
814 PrintObj(PMV.ShiftAmt);
815 O << " Mask: ";
816 PrintObj(PMV.Mask);
817 O << " Inv_Mask: ";
818 PrintObj(PMV.Inv_Mask);
819 O << "}\n";
820 return O;
821}
822
823} // end anonymous namespace
824
825/// This is a helper function which builds instructions to provide
826/// values necessary for partword atomic operations. It takes an
827/// incoming address, Addr, and ValueType, and constructs the address,
828/// shift-amounts and masks needed to work with a larger value of size
829/// WordSize.
830///
831/// AlignedAddr: Addr rounded down to a multiple of WordSize
832///
833/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
834/// from AlignAddr for it to have the same value as if
835/// ValueType was loaded from Addr.
836///
837/// Mask: Value to mask with the value loaded from AlignAddr to
838/// include only the part that would've been loaded from Addr.
839///
840/// Inv_Mask: The inverse of Mask.
841static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
843 Value *Addr, Align AddrAlign,
844 unsigned MinWordSize) {
845 PartwordMaskValues PMV;
846
847 Module *M = I->getModule();
848 LLVMContext &Ctx = M->getContext();
849 const DataLayout &DL = M->getDataLayout();
850 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
851
852 PMV.ValueType = PMV.IntValueType = ValueType;
853 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
854 PMV.IntValueType =
855 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
856
857 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
858 : ValueType;
859 if (PMV.ValueType == PMV.WordType) {
860 PMV.AlignedAddr = Addr;
861 PMV.AlignedAddrAlignment = AddrAlign;
862 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
863 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
864 return PMV;
865 }
866
867 PMV.AlignedAddrAlignment = Align(MinWordSize);
868
869 assert(ValueSize < MinWordSize);
870
871 PointerType *PtrTy = cast<PointerType>(Addr->getType());
872 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
873 Value *PtrLSB;
874
875 if (AddrAlign < MinWordSize) {
876 PMV.AlignedAddr = Builder.CreateIntrinsic(
877 Intrinsic::ptrmask, {PtrTy, IntTy},
878 {Addr, ConstantInt::getSigned(IntTy, ~(uint64_t)(MinWordSize - 1))},
879 nullptr, "AlignedAddr");
880
881 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
882 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
883 } else {
884 // If the alignment is high enough, the LSB are known 0.
885 PMV.AlignedAddr = Addr;
886 PtrLSB = ConstantInt::getNullValue(IntTy);
887 }
888
889 if (DL.isLittleEndian()) {
890 // turn bytes into bits
891 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
892 } else {
893 // turn bytes into bits, and count from the other side.
894 PMV.ShiftAmt = Builder.CreateShl(
895 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
896 }
897
898 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
899 PMV.Mask = Builder.CreateShl(
900 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
901 "Mask");
902
903 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
904
905 return PMV;
906}
907
908static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
909 const PartwordMaskValues &PMV) {
910 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
911 if (PMV.WordType == PMV.ValueType)
912 return WideWord;
913
914 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
915 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
916 return Builder.CreateBitCast(Trunc, PMV.ValueType);
917}
918
919static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
920 Value *Updated, const PartwordMaskValues &PMV) {
921 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
922 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
923 if (PMV.WordType == PMV.ValueType)
924 return Updated;
925
926 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
927
928 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
929 Value *Shift =
930 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
931 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
932 Value *Or = Builder.CreateOr(And, Shift, "inserted");
933 return Or;
934}
935
936/// Emit IR to implement a masked version of a given atomicrmw
937/// operation. (That is, only the bits under the Mask should be
938/// affected by the operation)
940 IRBuilderBase &Builder, Value *Loaded,
941 Value *Shifted_Inc, Value *Inc,
942 const PartwordMaskValues &PMV) {
943 // TODO: update to use
944 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
945 // to merge bits from two values without requiring PMV.Inv_Mask.
946 switch (Op) {
947 case AtomicRMWInst::Xchg: {
948 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
949 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
950 return FinalVal;
951 }
955 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
958 case AtomicRMWInst::Nand: {
959 // The other arithmetic ops need to be masked into place.
960 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
961 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
962 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
963 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
964 return FinalVal;
965 }
982 // Finally, other ops will operate on the full value, so truncate down to
983 // the original size, and expand out again after doing the
984 // operation. Bitcasts will be inserted for FP values.
985 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
986 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
987 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
988 return FinalVal;
989 }
990 default:
991 llvm_unreachable("Unknown atomic op");
992 }
993}
994
995/// Expand a sub-word atomicrmw operation into an appropriate
996/// word-sized operation.
997///
998/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
999/// way as a typical atomicrmw expansion. The only difference here is
1000/// that the operation inside of the loop may operate upon only a
1001/// part of the value.
1002void AtomicExpandImpl::expandPartwordAtomicRMW(
1003 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
1004 // Widen And/Or/Xor and give the target another chance at expanding it.
1008 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
1009 return;
1010 }
1011 AtomicOrdering MemOpOrder = AI->getOrdering();
1012 SyncScope::ID SSID = AI->getSyncScopeID();
1013
1014 ReplacementIRBuilder Builder(AI, *DL);
1015
1016 PartwordMaskValues PMV =
1017 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1018 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1019
1020 Value *ValOperand_Shifted = nullptr;
1023 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
1024 ValOperand_Shifted =
1025 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
1026 "ValOperand_Shifted");
1027 }
1028
1029 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
1030 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
1031 AI->getValOperand(), PMV);
1032 };
1033
1034 Value *OldResult;
1035 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
1036 OldResult = insertRMWCmpXchgLoop(
1037 Builder, PMV.WordType, PMV.AlignedAddr, PMV.AlignedAddrAlignment,
1038 MemOpOrder, SSID, PerformPartwordOp, createCmpXchgInstFun, AI);
1039 } else {
1040 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
1041 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1042 PMV.AlignedAddrAlignment, MemOpOrder,
1043 PerformPartwordOp);
1044 }
1045
1046 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1047 AI->replaceAllUsesWith(FinalOldResult);
1048 AI->eraseFromParent();
1049}
1050
1051// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1052AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1053 ReplacementIRBuilder Builder(AI, *DL);
1055
1057 Op == AtomicRMWInst::And) &&
1058 "Unable to widen operation");
1059
1060 PartwordMaskValues PMV =
1061 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1062 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1063
1064 Value *ValOperand_Shifted =
1065 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
1066 PMV.ShiftAmt, "ValOperand_Shifted");
1067
1068 Value *NewOperand;
1069
1070 if (Op == AtomicRMWInst::And)
1071 NewOperand =
1072 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
1073 else
1074 NewOperand = ValOperand_Shifted;
1075
1076 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1077 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1078 AI->getOrdering(), AI->getSyncScopeID());
1079
1080 copyMetadataForAtomic(*NewAI, *AI);
1081
1082 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1083 AI->replaceAllUsesWith(FinalOldResult);
1084 AI->eraseFromParent();
1085 return NewAI;
1086}
1087
1088bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1089 // The basic idea here is that we're expanding a cmpxchg of a
1090 // smaller memory size up to a word-sized cmpxchg. To do this, we
1091 // need to add a retry-loop for strong cmpxchg, so that
1092 // modifications to other parts of the word don't cause a spurious
1093 // failure.
1094
1095 // This generates code like the following:
1096 // [[Setup mask values PMV.*]]
1097 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1098 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1099 // %InitLoaded = load i32* %addr
1100 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1101 // br partword.cmpxchg.loop
1102 // partword.cmpxchg.loop:
1103 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1104 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1105 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1106 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1107 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1108 // i32 %FullWord_NewVal success_ordering failure_ordering
1109 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1110 // %Success = extractvalue { i32, i1 } %NewCI, 1
1111 // br i1 %Success, label %partword.cmpxchg.end,
1112 // label %partword.cmpxchg.failure
1113 // partword.cmpxchg.failure:
1114 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1115 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1116 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1117 // label %partword.cmpxchg.end
1118 // partword.cmpxchg.end:
1119 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1120 // %FinalOldVal = trunc i32 %tmp1 to i8
1121 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1122 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1123
1124 Value *Addr = CI->getPointerOperand();
1125 Value *Cmp = CI->getCompareOperand();
1126 Value *NewVal = CI->getNewValOperand();
1127
1128 BasicBlock *BB = CI->getParent();
1129 Function *F = BB->getParent();
1130 ReplacementIRBuilder Builder(CI, *DL);
1131 LLVMContext &Ctx = Builder.getContext();
1132
1133 BasicBlock *EndBB =
1134 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1135 auto FailureBB =
1136 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1137 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1138
1139 // The split call above "helpfully" added a branch at the end of BB
1140 // (to the wrong place).
1141 std::prev(BB->end())->eraseFromParent();
1142 Builder.SetInsertPoint(BB);
1143
1144 PartwordMaskValues PMV =
1145 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1146 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1147
1148 // Shift the incoming values over, into the right location in the word.
1149 Value *NewVal_Shifted =
1150 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1151 Value *Cmp_Shifted =
1152 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1153
1154 // Load the entire current word, and mask into place the expected and new
1155 // values
1156 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1157 InitLoaded->setVolatile(CI->isVolatile());
1158 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1159 Builder.CreateBr(LoopBB);
1160
1161 // partword.cmpxchg.loop:
1162 Builder.SetInsertPoint(LoopBB);
1163 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1164 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1165
1166 // Mask/Or the expected and new values into place in the loaded word.
1167 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1168 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1169 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1170 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1172 NewCI->setVolatile(CI->isVolatile());
1173 // When we're building a strong cmpxchg, we need a loop, so you
1174 // might think we could use a weak cmpxchg inside. But, using strong
1175 // allows the below comparison for ShouldContinue, and we're
1176 // expecting the underlying cmpxchg to be a machine instruction,
1177 // which is strong anyways.
1178 NewCI->setWeak(CI->isWeak());
1179
1180 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1181 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1182
1183 if (CI->isWeak())
1184 Builder.CreateBr(EndBB);
1185 else
1186 Builder.CreateCondBr(Success, EndBB, FailureBB);
1187
1188 // partword.cmpxchg.failure:
1189 Builder.SetInsertPoint(FailureBB);
1190 // Upon failure, verify that the masked-out part of the loaded value
1191 // has been modified. If it didn't, abort the cmpxchg, since the
1192 // masked-in part must've.
1193 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1194 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1195 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1196
1197 // Add the second value to the phi from above
1198 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1199
1200 // partword.cmpxchg.end:
1201 Builder.SetInsertPoint(CI);
1202
1203 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1204 Value *Res = PoisonValue::get(CI->getType());
1205 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1206 Res = Builder.CreateInsertValue(Res, Success, 1);
1207
1208 CI->replaceAllUsesWith(Res);
1209 CI->eraseFromParent();
1210 return true;
1211}
1212
1213void AtomicExpandImpl::expandAtomicOpToLLSC(
1214 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1215 AtomicOrdering MemOpOrder,
1216 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1217 ReplacementIRBuilder Builder(I, *DL);
1218 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1219 MemOpOrder, PerformOp);
1220
1221 I->replaceAllUsesWith(Loaded);
1222 I->eraseFromParent();
1223}
1224
1225void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1226 ReplacementIRBuilder Builder(AI, *DL);
1227
1228 PartwordMaskValues PMV =
1229 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1230 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1231
1232 // The value operand must be sign-extended for signed min/max so that the
1233 // target's signed comparison instructions can be used. Otherwise, just
1234 // zero-ext.
1235 Instruction::CastOps CastOp = Instruction::ZExt;
1236 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1237 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1238 CastOp = Instruction::SExt;
1239
1240 Value *ValOperand_Shifted = Builder.CreateShl(
1241 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1242 PMV.ShiftAmt, "ValOperand_Shifted");
1243 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1244 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1245 AI->getOrdering());
1246 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1247 AI->replaceAllUsesWith(FinalOldResult);
1248 AI->eraseFromParent();
1249}
1250
1251void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1252 AtomicCmpXchgInst *CI) {
1253 ReplacementIRBuilder Builder(CI, *DL);
1254
1255 PartwordMaskValues PMV = createMaskInstrs(
1256 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1257 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1258
1259 Value *CmpVal_Shifted = Builder.CreateShl(
1260 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1261 "CmpVal_Shifted");
1262 Value *NewVal_Shifted = Builder.CreateShl(
1263 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1264 "NewVal_Shifted");
1266 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1267 CI->getMergedOrdering());
1268 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1269 Value *Res = PoisonValue::get(CI->getType());
1270 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1271 Value *Success = Builder.CreateICmpEQ(
1272 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1273 Res = Builder.CreateInsertValue(Res, Success, 1);
1274
1275 CI->replaceAllUsesWith(Res);
1276 CI->eraseFromParent();
1277}
1278
1279Value *AtomicExpandImpl::insertRMWLLSCLoop(
1280 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1281 AtomicOrdering MemOpOrder,
1282 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1283 LLVMContext &Ctx = Builder.getContext();
1284 BasicBlock *BB = Builder.GetInsertBlock();
1285 Function *F = BB->getParent();
1286
1287 assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) &&
1288 "Expected at least natural alignment at this point.");
1289
1290 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1291 //
1292 // The standard expansion we produce is:
1293 // [...]
1294 // atomicrmw.start:
1295 // %loaded = @load.linked(%addr)
1296 // %new = some_op iN %loaded, %incr
1297 // %stored = @store_conditional(%new, %addr)
1298 // %try_again = icmp i32 ne %stored, 0
1299 // br i1 %try_again, label %loop, label %atomicrmw.end
1300 // atomicrmw.end:
1301 // [...]
1302 BasicBlock *ExitBB =
1303 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1304 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1305
1306 // The split call above "helpfully" added a branch at the end of BB (to the
1307 // wrong place).
1308 std::prev(BB->end())->eraseFromParent();
1309 Builder.SetInsertPoint(BB);
1310 Builder.CreateBr(LoopBB);
1311
1312 // Start the main loop block now that we've taken care of the preliminaries.
1313 Builder.SetInsertPoint(LoopBB);
1314 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1315
1316 Value *NewVal = PerformOp(Builder, Loaded);
1317
1318 Value *StoreSuccess =
1319 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1320 Value *TryAgain = Builder.CreateICmpNE(
1321 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1322
1323 Instruction *CondBr = Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1324
1325 // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
1326 // hard to predict precise branch weigths we mark the branch as "unknown"
1327 // (50/50) to prevent misleading optimizations.
1329
1330 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1331 return Loaded;
1332}
1333
1334/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1335/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1336/// IR. As a migration step, we convert back to what use to be the standard
1337/// way to represent a pointer cmpxchg so that we can update backends one by
1338/// one.
1339AtomicCmpXchgInst *
1340AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1341 auto *M = CI->getModule();
1342 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1343 M->getDataLayout());
1344
1345 ReplacementIRBuilder Builder(CI, *DL);
1346
1347 Value *Addr = CI->getPointerOperand();
1348
1349 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1350 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1351
1352 auto *NewCI = Builder.CreateAtomicCmpXchg(
1353 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1354 CI->getFailureOrdering(), CI->getSyncScopeID());
1355 NewCI->setVolatile(CI->isVolatile());
1356 NewCI->setWeak(CI->isWeak());
1357 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1358
1359 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1360 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1361
1362 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1363
1364 Value *Res = PoisonValue::get(CI->getType());
1365 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1366 Res = Builder.CreateInsertValue(Res, Succ, 1);
1367
1368 CI->replaceAllUsesWith(Res);
1369 CI->eraseFromParent();
1370 return NewCI;
1371}
1372
1373bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1374 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1375 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1376 Value *Addr = CI->getPointerOperand();
1377 BasicBlock *BB = CI->getParent();
1378 Function *F = BB->getParent();
1379 LLVMContext &Ctx = F->getContext();
1380 // If shouldInsertFencesForAtomic() returns true, then the target does not
1381 // want to deal with memory orders, and emitLeading/TrailingFence should take
1382 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1383 // should preserve the ordering.
1384 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1385 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1386 ? AtomicOrdering::Monotonic
1387 : CI->getMergedOrdering();
1388
1389 // In implementations which use a barrier to achieve release semantics, we can
1390 // delay emitting this barrier until we know a store is actually going to be
1391 // attempted. The cost of this delay is that we need 2 copies of the block
1392 // emitting the load-linked, affecting code size.
1393 //
1394 // Ideally, this logic would be unconditional except for the minsize check
1395 // since in other cases the extra blocks naturally collapse down to the
1396 // minimal loop. Unfortunately, this puts too much stress on later
1397 // optimisations so we avoid emitting the extra logic in those cases too.
1398 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1399 SuccessOrder != AtomicOrdering::Monotonic &&
1400 SuccessOrder != AtomicOrdering::Acquire &&
1401 !F->hasMinSize();
1402
1403 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1404 // do it even on minsize.
1405 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1406
1407 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1408 //
1409 // The full expansion we produce is:
1410 // [...]
1411 // %aligned.addr = ...
1412 // cmpxchg.start:
1413 // %unreleasedload = @load.linked(%aligned.addr)
1414 // %unreleasedload.extract = extract value from %unreleasedload
1415 // %should_store = icmp eq %unreleasedload.extract, %desired
1416 // br i1 %should_store, label %cmpxchg.releasingstore,
1417 // label %cmpxchg.nostore
1418 // cmpxchg.releasingstore:
1419 // fence?
1420 // br label cmpxchg.trystore
1421 // cmpxchg.trystore:
1422 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1423 // [%releasedload, %cmpxchg.releasedload]
1424 // %updated.new = insert %new into %loaded.trystore
1425 // %stored = @store_conditional(%updated.new, %aligned.addr)
1426 // %success = icmp eq i32 %stored, 0
1427 // br i1 %success, label %cmpxchg.success,
1428 // label %cmpxchg.releasedload/%cmpxchg.failure
1429 // cmpxchg.releasedload:
1430 // %releasedload = @load.linked(%aligned.addr)
1431 // %releasedload.extract = extract value from %releasedload
1432 // %should_store = icmp eq %releasedload.extract, %desired
1433 // br i1 %should_store, label %cmpxchg.trystore,
1434 // label %cmpxchg.failure
1435 // cmpxchg.success:
1436 // fence?
1437 // br label %cmpxchg.end
1438 // cmpxchg.nostore:
1439 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1440 // [%releasedload,
1441 // %cmpxchg.releasedload/%cmpxchg.trystore]
1442 // @load_linked_fail_balance()?
1443 // br label %cmpxchg.failure
1444 // cmpxchg.failure:
1445 // fence?
1446 // br label %cmpxchg.end
1447 // cmpxchg.end:
1448 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1449 // [%loaded.trystore, %cmpxchg.trystore]
1450 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1451 // %loaded = extract value from %loaded.exit
1452 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1453 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1454 // [...]
1455 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1456 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1457 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1458 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1459 auto ReleasedLoadBB =
1460 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1461 auto TryStoreBB =
1462 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1463 auto ReleasingStoreBB =
1464 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1465 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1466
1467 ReplacementIRBuilder Builder(CI, *DL);
1468
1469 // The split call above "helpfully" added a branch at the end of BB (to the
1470 // wrong place), but we might want a fence too. It's easiest to just remove
1471 // the branch entirely.
1472 std::prev(BB->end())->eraseFromParent();
1473 Builder.SetInsertPoint(BB);
1474 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1475 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1476
1477 PartwordMaskValues PMV =
1478 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1479 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1480 Builder.CreateBr(StartBB);
1481
1482 // Start the main loop block now that we've taken care of the preliminaries.
1483 Builder.SetInsertPoint(StartBB);
1484 Value *UnreleasedLoad =
1485 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1486 Value *UnreleasedLoadExtract =
1487 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1488 Value *ShouldStore = Builder.CreateICmpEQ(
1489 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1490
1491 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1492 // jump straight past that fence instruction (if it exists).
1493 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
1494 MDBuilder(F->getContext()).createLikelyBranchWeights());
1495
1496 Builder.SetInsertPoint(ReleasingStoreBB);
1497 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1498 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1499 Builder.CreateBr(TryStoreBB);
1500
1501 Builder.SetInsertPoint(TryStoreBB);
1502 PHINode *LoadedTryStore =
1503 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1504 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1505 Value *NewValueInsert =
1506 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1507 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1508 PMV.AlignedAddr, MemOpOrder);
1509 StoreSuccess = Builder.CreateICmpEQ(
1510 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1511 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1512 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1513 CI->isWeak() ? FailureBB : RetryBB,
1514 MDBuilder(F->getContext()).createLikelyBranchWeights());
1515
1516 Builder.SetInsertPoint(ReleasedLoadBB);
1517 Value *SecondLoad;
1518 if (HasReleasedLoadBB) {
1519 SecondLoad =
1520 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1521 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1522 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1523 CI->getCompareOperand(), "should_store");
1524
1525 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1526 // jump straight past that fence instruction (if it exists).
1527 Builder.CreateCondBr(
1528 ShouldStore, TryStoreBB, NoStoreBB,
1529 MDBuilder(F->getContext()).createLikelyBranchWeights());
1530 // Update PHI node in TryStoreBB.
1531 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1532 } else
1533 Builder.CreateUnreachable();
1534
1535 // Make sure later instructions don't get reordered with a fence if
1536 // necessary.
1537 Builder.SetInsertPoint(SuccessBB);
1538 if (ShouldInsertFencesForAtomic ||
1540 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1541 Builder.CreateBr(ExitBB);
1542
1543 Builder.SetInsertPoint(NoStoreBB);
1544 PHINode *LoadedNoStore =
1545 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1546 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1547 if (HasReleasedLoadBB)
1548 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1549
1550 // In the failing case, where we don't execute the store-conditional, the
1551 // target might want to balance out the load-linked with a dedicated
1552 // instruction (e.g., on ARM, clearing the exclusive monitor).
1554 Builder.CreateBr(FailureBB);
1555
1556 Builder.SetInsertPoint(FailureBB);
1557 PHINode *LoadedFailure =
1558 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1559 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1560 if (CI->isWeak())
1561 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1562 if (ShouldInsertFencesForAtomic)
1563 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1564 Builder.CreateBr(ExitBB);
1565
1566 // Finally, we have control-flow based knowledge of whether the cmpxchg
1567 // succeeded or not. We expose this to later passes by converting any
1568 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1569 // PHI.
1570 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1571 PHINode *LoadedExit =
1572 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1573 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1574 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1575 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1576 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1577 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1578
1579 // This is the "exit value" from the cmpxchg expansion. It may be of
1580 // a type wider than the one in the cmpxchg instruction.
1581 Value *LoadedFull = LoadedExit;
1582
1583 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1584 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1585
1586 // Look for any users of the cmpxchg that are just comparing the loaded value
1587 // against the desired one, and replace them with the CFG-derived version.
1589 for (auto *User : CI->users()) {
1590 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1591 if (!EV)
1592 continue;
1593
1594 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1595 "weird extraction from { iN, i1 }");
1596
1597 if (EV->getIndices()[0] == 0)
1598 EV->replaceAllUsesWith(Loaded);
1599 else
1601
1602 PrunedInsts.push_back(EV);
1603 }
1604
1605 // We can remove the instructions now we're no longer iterating through them.
1606 for (auto *EV : PrunedInsts)
1607 EV->eraseFromParent();
1608
1609 if (!CI->use_empty()) {
1610 // Some use of the full struct return that we don't understand has happened,
1611 // so we've got to reconstruct it properly.
1612 Value *Res;
1613 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1614 Res = Builder.CreateInsertValue(Res, Success, 1);
1615
1616 CI->replaceAllUsesWith(Res);
1617 }
1618
1619 CI->eraseFromParent();
1620 return true;
1621}
1622
1623bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1624 // TODO: Add floating point support.
1625 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1626 if (!C)
1627 return false;
1628
1629 switch (RMWI->getOperation()) {
1630 case AtomicRMWInst::Add:
1631 case AtomicRMWInst::Sub:
1632 case AtomicRMWInst::Or:
1633 case AtomicRMWInst::Xor:
1634 return C->isZero();
1635 case AtomicRMWInst::And:
1636 return C->isMinusOne();
1637 case AtomicRMWInst::Min:
1638 return C->isMaxValue(true);
1639 case AtomicRMWInst::Max:
1640 return C->isMinValue(true);
1642 return C->isMaxValue(false);
1644 return C->isMinValue(false);
1645 default:
1646 return false;
1647 }
1648}
1649
1650bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1651 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1652 tryExpandAtomicLoad(ResultingLoad);
1653 return true;
1654 }
1655 return false;
1656}
1657
1658Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1659 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1660 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
1661 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1662 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1663 LLVMContext &Ctx = Builder.getContext();
1664 BasicBlock *BB = Builder.GetInsertBlock();
1665 Function *F = BB->getParent();
1666
1667 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1668 //
1669 // The standard expansion we produce is:
1670 // [...]
1671 // %init_loaded = load atomic iN* %addr
1672 // br label %loop
1673 // loop:
1674 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1675 // %new = some_op iN %loaded, %incr
1676 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1677 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1678 // %success = extractvalue { iN, i1 } %pair, 1
1679 // br i1 %success, label %atomicrmw.end, label %loop
1680 // atomicrmw.end:
1681 // [...]
1682 BasicBlock *ExitBB =
1683 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1684 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1685
1686 // The split call above "helpfully" added a branch at the end of BB (to the
1687 // wrong place), but we want a load. It's easiest to just remove
1688 // the branch entirely.
1689 std::prev(BB->end())->eraseFromParent();
1690 Builder.SetInsertPoint(BB);
1691 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1692 // TODO: The initial load must be atomic with the same synchronization scope
1693 // to avoid a data race with concurrent stores. If the instruction being
1694 // emulated is volatile, issue a volatile load.
1695 Builder.CreateBr(LoopBB);
1696
1697 // Start the main loop block now that we've taken care of the preliminaries.
1698 Builder.SetInsertPoint(LoopBB);
1699 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1700 Loaded->addIncoming(InitLoaded, BB);
1701
1702 Value *NewVal = PerformOp(Builder, Loaded);
1703
1704 Value *NewLoaded = nullptr;
1705 Value *Success = nullptr;
1706
1707 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1708 MemOpOrder == AtomicOrdering::Unordered
1709 ? AtomicOrdering::Monotonic
1710 : MemOpOrder,
1711 SSID, Success, NewLoaded, MetadataSrc);
1712 assert(Success && NewLoaded);
1713
1714 Loaded->addIncoming(NewLoaded, LoopBB);
1715
1716 Instruction *CondBr = Builder.CreateCondBr(Success, ExitBB, LoopBB);
1717
1718 // Atomic RMW expands to a cmpxchg loop, Since precise branch weights
1719 // cannot be easily determined here, we mark the branch as "unknown" (50/50)
1720 // to prevent misleading optimizations.
1722
1723 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1724 return NewLoaded;
1725}
1726
1727bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1728 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1729 unsigned ValueSize = getAtomicOpSize(CI);
1730
1731 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1732 default:
1733 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1734 case TargetLoweringBase::AtomicExpansionKind::None:
1735 if (ValueSize < MinCASSize)
1736 return expandPartwordCmpXchg(CI);
1737 return false;
1738 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1739 return expandAtomicCmpXchg(CI);
1740 }
1741 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1742 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1743 return true;
1744 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1745 return lowerAtomicCmpXchgInst(CI);
1746 case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
1747 TLI->emitExpandAtomicCmpXchg(CI);
1748 return true;
1749 }
1750 }
1751}
1752
1753bool AtomicExpandImpl::expandAtomicRMWToCmpXchg(
1754 AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) {
1755 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1756 Builder.setIsFPConstrained(
1757 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1758
1759 // FIXME: If FP exceptions are observable, we should force them off for the
1760 // loop for the FP atomics.
1761 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1762 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1763 AI->getOrdering(), AI->getSyncScopeID(),
1764 [&](IRBuilderBase &Builder, Value *Loaded) {
1765 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1766 AI->getValOperand());
1767 },
1768 CreateCmpXchg, /*MetadataSrc=*/AI);
1769
1770 AI->replaceAllUsesWith(Loaded);
1771 AI->eraseFromParent();
1772 return true;
1773}
1774
1775// In order to use one of the sized library calls such as
1776// __atomic_fetch_add_4, the alignment must be sufficient, the size
1777// must be one of the potentially-specialized sizes, and the value
1778// type must actually exist in C on the target (otherwise, the
1779// function wouldn't actually be defined.)
1780static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1781 const DataLayout &DL) {
1782 // TODO: "LargestSize" is an approximation for "largest type that
1783 // you can express in C". It seems to be the case that int128 is
1784 // supported on all 64-bit platforms, otherwise only up to 64-bit
1785 // integers are supported. If we get this wrong, then we'll try to
1786 // call a sized libcall that doesn't actually exist. There should
1787 // really be some more reliable way in LLVM of determining integer
1788 // sizes which are valid in the target's C ABI...
1789 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1790 return Alignment >= Size &&
1791 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1792 Size <= LargestSize;
1793}
1794
1795void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1796 static const RTLIB::Libcall Libcalls[6] = {
1797 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1798 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1799 unsigned Size = getAtomicOpSize(I);
1800
1801 bool expanded = expandAtomicOpToLibcall(
1802 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1803 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1804 if (!expanded)
1805 handleFailure(*I, "unsupported atomic load");
1806}
1807
1808void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1809 static const RTLIB::Libcall Libcalls[6] = {
1810 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1811 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1812 unsigned Size = getAtomicOpSize(I);
1813
1814 bool expanded = expandAtomicOpToLibcall(
1815 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1816 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1817 if (!expanded)
1818 handleFailure(*I, "unsupported atomic store");
1819}
1820
1821void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1822 static const RTLIB::Libcall Libcalls[6] = {
1823 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1824 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1825 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1826 unsigned Size = getAtomicOpSize(I);
1827
1828 bool expanded = expandAtomicOpToLibcall(
1829 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1830 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1831 Libcalls);
1832 if (!expanded)
1833 handleFailure(*I, "unsupported cmpxchg");
1834}
1835
1837 static const RTLIB::Libcall LibcallsXchg[6] = {
1838 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1839 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1840 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1841 static const RTLIB::Libcall LibcallsAdd[6] = {
1842 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1843 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1844 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1845 static const RTLIB::Libcall LibcallsSub[6] = {
1846 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1847 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1848 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1849 static const RTLIB::Libcall LibcallsAnd[6] = {
1850 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1851 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1852 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1853 static const RTLIB::Libcall LibcallsOr[6] = {
1854 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1855 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1856 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1857 static const RTLIB::Libcall LibcallsXor[6] = {
1858 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1859 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1860 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1861 static const RTLIB::Libcall LibcallsNand[6] = {
1862 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1863 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1864 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1865
1866 switch (Op) {
1868 llvm_unreachable("Should not have BAD_BINOP.");
1870 return ArrayRef(LibcallsXchg);
1871 case AtomicRMWInst::Add:
1872 return ArrayRef(LibcallsAdd);
1873 case AtomicRMWInst::Sub:
1874 return ArrayRef(LibcallsSub);
1875 case AtomicRMWInst::And:
1876 return ArrayRef(LibcallsAnd);
1877 case AtomicRMWInst::Or:
1878 return ArrayRef(LibcallsOr);
1879 case AtomicRMWInst::Xor:
1880 return ArrayRef(LibcallsXor);
1882 return ArrayRef(LibcallsNand);
1883 case AtomicRMWInst::Max:
1884 case AtomicRMWInst::Min:
1899 // No atomic libcalls are available for these.
1900 return {};
1901 }
1902 llvm_unreachable("Unexpected AtomicRMW operation.");
1903}
1904
1905void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1906 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1907
1908 unsigned Size = getAtomicOpSize(I);
1909
1910 bool Success = false;
1911 if (!Libcalls.empty())
1912 Success = expandAtomicOpToLibcall(
1913 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
1914 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1915
1916 // The expansion failed: either there were no libcalls at all for
1917 // the operation (min/max), or there were only size-specialized
1918 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1919 // CAS libcall, via a CAS loop, instead.
1920 if (!Success) {
1921 expandAtomicRMWToCmpXchg(
1922 I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
1923 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
1924 SyncScope::ID SSID, Value *&Success, Value *&NewLoaded,
1925 Instruction *MetadataSrc) {
1926 // Create the CAS instruction normally...
1927 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1928 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1930 if (MetadataSrc)
1931 copyMetadataForAtomic(*Pair, *MetadataSrc);
1932
1933 Success = Builder.CreateExtractValue(Pair, 1, "success");
1934 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1935
1936 // ...and then expand the CAS into a libcall.
1937 expandAtomicCASToLibcall(Pair);
1938 });
1939 }
1940}
1941
1942// A helper routine for the above expandAtomic*ToLibcall functions.
1943//
1944// 'Libcalls' contains an array of enum values for the particular
1945// ATOMIC libcalls to be emitted. All of the other arguments besides
1946// 'I' are extracted from the Instruction subclass by the
1947// caller. Depending on the particular call, some will be null.
1948bool AtomicExpandImpl::expandAtomicOpToLibcall(
1949 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
1950 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1951 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1952 assert(Libcalls.size() == 6);
1953
1954 LLVMContext &Ctx = I->getContext();
1955 Module *M = I->getModule();
1956 const DataLayout &DL = M->getDataLayout();
1957 IRBuilder<> Builder(I);
1958 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1959
1960 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
1961 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1962
1963 if (M->getTargetTriple().isOSWindows() && M->getTargetTriple().isX86_64() &&
1964 Size == 16) {
1965 // x86_64 Windows passes i128 as an XMM vector; on return, it is in
1966 // XMM0, and as a parameter, it is passed indirectly. The generic lowering
1967 // rules handles this correctly if we pass it as a v2i64 rather than
1968 // i128. This is what Clang does in the frontend for such types as well
1969 // (see WinX86_64ABIInfo::classify in Clang).
1970 SizedIntTy = FixedVectorType::get(Type::getInt64Ty(Ctx), 2);
1971 }
1972
1973 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
1974
1975 // TODO: the "order" argument type is "int", not int32. So
1976 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1977 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1978 Constant *OrderingVal =
1979 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1980 Constant *Ordering2Val = nullptr;
1981 if (CASExpected) {
1982 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1983 Ordering2Val =
1984 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1985 }
1986 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1987
1988 RTLIB::Libcall RTLibType;
1989 if (UseSizedLibcall) {
1990 switch (Size) {
1991 case 1:
1992 RTLibType = Libcalls[1];
1993 break;
1994 case 2:
1995 RTLibType = Libcalls[2];
1996 break;
1997 case 4:
1998 RTLibType = Libcalls[3];
1999 break;
2000 case 8:
2001 RTLibType = Libcalls[4];
2002 break;
2003 case 16:
2004 RTLibType = Libcalls[5];
2005 break;
2006 }
2007 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
2008 RTLibType = Libcalls[0];
2009 } else {
2010 // Can't use sized function, and there's no generic for this
2011 // operation, so give up.
2012 return false;
2013 }
2014
2015 RTLIB::LibcallImpl LibcallImpl = LibcallLowering->getLibcallImpl(RTLibType);
2016 if (LibcallImpl == RTLIB::Unsupported) {
2017 // This target does not implement the requested atomic libcall so give up.
2018 return false;
2019 }
2020
2021 // Build up the function call. There's two kinds. First, the sized
2022 // variants. These calls are going to be one of the following (with
2023 // N=1,2,4,8,16):
2024 // iN __atomic_load_N(iN *ptr, int ordering)
2025 // void __atomic_store_N(iN *ptr, iN val, int ordering)
2026 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
2027 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
2028 // int success_order, int failure_order)
2029 //
2030 // Note that these functions can be used for non-integer atomic
2031 // operations, the values just need to be bitcast to integers on the
2032 // way in and out.
2033 //
2034 // And, then, the generic variants. They look like the following:
2035 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
2036 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
2037 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
2038 // int ordering)
2039 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
2040 // void *desired, int success_order,
2041 // int failure_order)
2042 //
2043 // The different signatures are built up depending on the
2044 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
2045 // variables.
2046
2047 AllocaInst *AllocaCASExpected = nullptr;
2048 AllocaInst *AllocaValue = nullptr;
2049 AllocaInst *AllocaResult = nullptr;
2050
2051 Type *ResultTy;
2053 AttributeList Attr;
2054
2055 // 'size' argument.
2056 if (!UseSizedLibcall) {
2057 // Note, getIntPtrType is assumed equivalent to size_t.
2058 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
2059 }
2060
2061 // 'ptr' argument.
2062 // note: This assumes all address spaces share a common libfunc
2063 // implementation and that addresses are convertable. For systems without
2064 // that property, we'd need to extend this mechanism to support AS-specific
2065 // families of atomic intrinsics.
2066 Value *PtrVal = PointerOperand;
2067 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
2068 Args.push_back(PtrVal);
2069
2070 // 'expected' argument, if present.
2071 if (CASExpected) {
2072 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
2073 AllocaCASExpected->setAlignment(AllocaAlignment);
2074 Builder.CreateLifetimeStart(AllocaCASExpected);
2075 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
2076 Args.push_back(AllocaCASExpected);
2077 }
2078
2079 // 'val' argument ('desired' for cas), if present.
2080 if (ValueOperand) {
2081 if (UseSizedLibcall) {
2082 Value *IntValue =
2083 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
2084 Args.push_back(IntValue);
2085 } else {
2086 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
2087 AllocaValue->setAlignment(AllocaAlignment);
2088 Builder.CreateLifetimeStart(AllocaValue);
2089 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
2090 Args.push_back(AllocaValue);
2091 }
2092 }
2093
2094 // 'ret' argument.
2095 if (!CASExpected && HasResult && !UseSizedLibcall) {
2096 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
2097 AllocaResult->setAlignment(AllocaAlignment);
2098 Builder.CreateLifetimeStart(AllocaResult);
2099 Args.push_back(AllocaResult);
2100 }
2101
2102 // 'ordering' ('success_order' for cas) argument.
2103 Args.push_back(OrderingVal);
2104
2105 // 'failure_order' argument, if present.
2106 if (Ordering2Val)
2107 Args.push_back(Ordering2Val);
2108
2109 // Now, the return type.
2110 if (CASExpected) {
2111 ResultTy = Type::getInt1Ty(Ctx);
2112 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
2113 } else if (HasResult && UseSizedLibcall)
2114 ResultTy = SizedIntTy;
2115 else
2116 ResultTy = Type::getVoidTy(Ctx);
2117
2118 // Done with setting up arguments and return types, create the call:
2120 for (Value *Arg : Args)
2121 ArgTys.push_back(Arg->getType());
2122 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
2123 FunctionCallee LibcallFn = M->getOrInsertFunction(
2125 Attr);
2126 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
2127 Call->setAttributes(Attr);
2128 Value *Result = Call;
2129
2130 // And then, extract the results...
2131 if (ValueOperand && !UseSizedLibcall)
2132 Builder.CreateLifetimeEnd(AllocaValue);
2133
2134 if (CASExpected) {
2135 // The final result from the CAS is {load of 'expected' alloca, bool result
2136 // from call}
2137 Type *FinalResultTy = I->getType();
2138 Value *V = PoisonValue::get(FinalResultTy);
2139 Value *ExpectedOut = Builder.CreateAlignedLoad(
2140 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2141 Builder.CreateLifetimeEnd(AllocaCASExpected);
2142 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2143 V = Builder.CreateInsertValue(V, Result, 1);
2145 } else if (HasResult) {
2146 Value *V;
2147 if (UseSizedLibcall)
2148 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2149 else {
2150 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2151 AllocaAlignment);
2152 Builder.CreateLifetimeEnd(AllocaResult);
2153 }
2155 }
2156 I->eraseFromParent();
2157 return true;
2158}
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded, Instruction *MetadataSrc)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
static bool isIdempotentRMW(AtomicRMWInst &RMWI)
Return true if and only if the given instruction does not modify the memory location referenced.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
#define T
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ FSub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static LLVM_ABI StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
iterator end()
Definition BasicBlock.h:462
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:449
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
reverse_iterator rbegin()
Definition BasicBlock.h:465
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
reverse_iterator rend()
Definition BasicBlock.h:467
void setAttributes(AttributeList A)
Set the attributes for this call.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
BasicBlockListType::iterator iterator
Definition Function.h:70
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1928
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2638
LLVM_ABI CallInst * CreateLifetimeStart(Value *Ptr)
Create a lifetime.start intrinsic.
LLVM_ABI CallInst * CreateLifetimeEnd(Value *Ptr)
Create a lifetime.end intrinsic.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1894
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1223
UnreachableInst * CreateUnreachable()
Definition IRBuilder.h:1365
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2631
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2194
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition IRBuilder.h:2233
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2335
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1217
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2281
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2496
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2331
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition IRBuilder.h:351
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1877
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1518
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2077
LLVMContext & getContext() const
Definition IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1577
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2189
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2510
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1941
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1913
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1599
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2204
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2811
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
Tracks which library functions to use for a particular subtarget.
LLVM_ABI RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
Record a mapping from subtarget to LibcallLoweringInfo.
const LibcallLoweringInfo & getLibcallLowering(const TargetSubtargetInfo &Subtarget) const
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Metadata node.
Definition Metadata.h:1080
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
LLVMContext & getContext() const
Get the global data context.
Definition Module.h:285
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
virtual Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const
Perform a store-conditional operation to Addr.
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a bit test atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
virtual bool shouldInsertFencesForAtomic(const Instruction *I) const
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
virtual AtomicOrdering atomicOperationOrderAfterFenceSplit(const Instruction *I) const
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const
Perform a cmpxchg expansion using a target-specific method.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const
Perform a masked atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const
Perform a atomicrmw expansion using a target-specific way.
virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const
virtual void emitExpandAtomicStore(StoreInst *SI) const
Perform a atomic store using a target-specific way.
virtual AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const
Returns how the given atomic atomicrmw should be cast by the IR-level AtomicExpand pass.
virtual bool shouldInsertTrailingSeqCstFenceForAtomicStore(const Instruction *I) const
Whether AtomicExpandPass should automatically insert a seq_cst trailing fence without reducing the or...
virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const
Perform a masked cmpxchg using a target-specific intrinsic.
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual void emitExpandAtomicLoad(LoadInst *LI) const
Perform a atomic load using a target-specific way.
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a atomicrmw which the result is only used by comparison, using a target-specific intrinsic.
virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
virtual Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
virtual Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
Inserts in the IR a target-specific intrinsic specifying a fence.
virtual LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const
On some platforms, an AtomicRMW that never actually modifies the value (such as fetch_add of 0) can b...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
bool isVoidTy() const
Return true if this is 'void'.
Definition Type.h:141
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:427
bool use_empty() const
Definition Value.h:347
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
LLVM_ABI char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:420
Matching combinators.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.