LLVM 23.0.0git
AtomicExpandPass.cpp
Go to the documentation of this file.
1//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file contains a pass (at IR level) to replace atomic instructions with
10// __atomic_* library calls, or target specific instruction which implement the
11// same semantics in a way which better fits the target backend. This can
12// include the use of (intrinsic-based) load-linked/store-conditional loops,
13// AtomicCmpXchg, or type coercions.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/ADT/ArrayRef.h"
28#include "llvm/IR/Attributes.h"
29#include "llvm/IR/BasicBlock.h"
30#include "llvm/IR/Constant.h"
31#include "llvm/IR/Constants.h"
32#include "llvm/IR/DataLayout.h"
34#include "llvm/IR/Function.h"
35#include "llvm/IR/IRBuilder.h"
36#include "llvm/IR/Instruction.h"
38#include "llvm/IR/MDBuilder.h"
40#include "llvm/IR/Module.h"
42#include "llvm/IR/Type.h"
43#include "llvm/IR/User.h"
44#include "llvm/IR/Value.h"
46#include "llvm/Pass.h"
49#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <cstdint>
56#include <iterator>
57
58using namespace llvm;
59
60#define DEBUG_TYPE "atomic-expand"
61
62namespace {
63
64class AtomicExpandImpl {
65 const TargetLowering *TLI = nullptr;
66 const LibcallLoweringInfo *LibcallLowering = nullptr;
67 const DataLayout *DL = nullptr;
68
69private:
70 /// Callback type for emitting a cmpxchg instruction during RMW expansion.
71 /// Parameters: (Builder, Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
72 /// SSID, IsVolatile, /* OUT */ Success, /* OUT */ NewLoaded,
73 /// MetadataSrc)
74 using CreateCmpXchgInstFun = function_ref<void(
76 SyncScope::ID, bool, Value *&, Value *&, Instruction *)>;
77
78 void handleFailure(Instruction &FailedInst, const Twine &Msg,
79 Instruction *DiagnosticInst = nullptr) const {
80 LLVMContext &Ctx = FailedInst.getContext();
81
82 // TODO: Do not use generic error type.
83 Ctx.emitError(DiagnosticInst ? DiagnosticInst : &FailedInst, Msg);
84
85 if (!FailedInst.getType()->isVoidTy())
86 FailedInst.replaceAllUsesWith(PoisonValue::get(FailedInst.getType()));
87 FailedInst.eraseFromParent();
88 }
89
90 template <typename Inst>
91 void handleUnsupportedAtomicSize(Inst *I, const Twine &AtomicOpName,
92 Instruction *DiagnosticInst = nullptr) const;
93
94 bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
95 bool tryInsertTrailingSeqCstFence(Instruction *AtomicI);
96 template <typename AtomicInst>
97 bool tryInsertFencesForAtomic(AtomicInst *AtomicI, bool OrderingRequiresFence,
98 AtomicOrdering NewOrdering);
99 IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
100 LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
101 bool tryExpandAtomicLoad(LoadInst *LI);
102 bool expandAtomicLoadToLL(LoadInst *LI);
103 bool expandAtomicLoadToCmpXchg(LoadInst *LI);
104 StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
105 bool tryExpandAtomicStore(StoreInst *SI);
106 void expandAtomicStoreToXChg(StoreInst *SI);
107 bool tryExpandAtomicRMW(AtomicRMWInst *AI);
108 AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);
109 Value *
110 insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,
111 Align AddrAlign, AtomicOrdering MemOpOrder,
112 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
113 void expandAtomicOpToLLSC(
114 Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,
115 AtomicOrdering MemOpOrder,
116 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);
117 void expandPartwordAtomicRMW(
119 AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
120 bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);
121 void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
122 void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);
123
124 AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
125 Value *insertRMWCmpXchgLoop(
126 IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,
127 AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile,
128 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
129 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);
130 bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
131
132 bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
133 bool isIdempotentRMW(AtomicRMWInst *RMWI);
134 bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
135
136 bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,
137 Value *PointerOperand, Value *ValueOperand,
138 Value *CASExpected, AtomicOrdering Ordering,
139 AtomicOrdering Ordering2,
140 ArrayRef<RTLIB::Libcall> Libcalls);
141 void expandAtomicLoadToLibcall(LoadInst *LI);
142 void expandAtomicStoreToLibcall(StoreInst *LI);
143 void expandAtomicRMWToLibcall(AtomicRMWInst *I);
144 void expandAtomicCASToLibcall(AtomicCmpXchgInst *I,
145 const Twine &AtomicOpName = "cmpxchg",
146 Instruction *DiagnosticInst = nullptr);
147
148 bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
149 CreateCmpXchgInstFun CreateCmpXchg);
150
151 bool processAtomicInstr(Instruction *I);
152
153public:
154 bool run(Function &F,
155 const LibcallLoweringModuleAnalysisResult &LibcallResult,
156 const TargetMachine *TM);
157};
158
159class AtomicExpandLegacy : public FunctionPass {
160public:
161 static char ID; // Pass identification, replacement for typeid
162
163 AtomicExpandLegacy() : FunctionPass(ID) {}
164
165 void getAnalysisUsage(AnalysisUsage &AU) const override {
168 }
169
170 bool runOnFunction(Function &F) override;
171};
172
173// IRBuilder to be used for replacement atomic instructions.
174struct ReplacementIRBuilder
175 : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
176 MDNode *MMRAMD = nullptr;
177
178 // Preserves the DebugLoc from I, and preserves still valid metadata.
179 // Enable StrictFP builder mode when appropriate.
180 explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)
181 : IRBuilder(I->getContext(), InstSimplifyFolder(DL),
183 [this](Instruction *I) { addMMRAMD(I); })) {
184 SetInsertPoint(I);
185 this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});
186 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
187 this->setIsFPConstrained(true);
188
189 MMRAMD = I->getMetadata(LLVMContext::MD_mmra);
190 }
191
192 void addMMRAMD(Instruction *I) {
194 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
195 }
196};
197
198} // end anonymous namespace
199
200char AtomicExpandLegacy::ID = 0;
201
202char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;
203
205 "Expand Atomic instructions", false, false)
208INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,
209 "Expand Atomic instructions", false, false)
210
211// Helper functions to retrieve the size of atomic instructions.
212static unsigned getAtomicOpSize(LoadInst *LI) {
213 const DataLayout &DL = LI->getDataLayout();
214 return DL.getTypeStoreSize(LI->getType());
215}
216
217static unsigned getAtomicOpSize(StoreInst *SI) {
218 const DataLayout &DL = SI->getDataLayout();
219 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
220}
221
222static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
223 const DataLayout &DL = RMWI->getDataLayout();
224 return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
225}
226
227static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
228 const DataLayout &DL = CASI->getDataLayout();
229 return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
230}
231
232/// Copy metadata that's safe to preserve when widening atomics.
234 const Instruction &Source) {
236 Source.getAllMetadata(MD);
237 LLVMContext &Ctx = Dest.getContext();
238 MDBuilder MDB(Ctx);
239
240 for (auto [ID, N] : MD) {
241 switch (ID) {
242 case LLVMContext::MD_dbg:
243 case LLVMContext::MD_tbaa:
244 case LLVMContext::MD_tbaa_struct:
245 case LLVMContext::MD_alias_scope:
246 case LLVMContext::MD_noalias:
247 case LLVMContext::MD_noalias_addrspace:
248 case LLVMContext::MD_access_group:
249 case LLVMContext::MD_mmra:
250 Dest.setMetadata(ID, N);
251 break;
252 default:
253 if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))
254 Dest.setMetadata(ID, N);
255 else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))
256 Dest.setMetadata(ID, N);
257
258 // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current
259 // uses.
260 break;
261 }
262 }
263}
264
265template <typename Inst>
266static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
267 unsigned Size = getAtomicOpSize(I);
268 Align Alignment = I->getAlign();
269 unsigned MaxSize = TLI->getMaxAtomicSizeInBitsSupported() / 8;
270 return Alignment >= Size && Size <= MaxSize;
271}
272
273template <typename Inst>
275 raw_ostream &OS) {
276 unsigned Size = getAtomicOpSize(I);
277 Align Alignment = I->getAlign();
278 bool NeedSeparator = false;
279
280 if (Alignment < Size) {
281 OS << "instruction alignment " << Alignment.value()
282 << " is smaller than the required " << Size
283 << "-byte alignment for this atomic operation";
284 NeedSeparator = true;
285 }
286
287 unsigned MaxSize = TLI->getMaxAtomicSizeInBitsSupported() / 8;
288 if (Size > MaxSize) {
289 if (NeedSeparator)
290 OS << "; ";
291 OS << "target supports atomics up to " << MaxSize
292 << " bytes, but this atomic accesses " << Size << " bytes";
293 }
294}
295
296template <typename Inst>
297void AtomicExpandImpl::handleUnsupportedAtomicSize(
298 Inst *I, const Twine &AtomicOpName, Instruction *DiagnosticInst) const {
299 assert(!atomicSizeSupported(TLI, I) && "expected unsupported atomic size");
300 SmallString<128> FailureReason;
301 raw_svector_ostream OS(FailureReason);
303 handleFailure(*I, Twine("unsupported ") + AtomicOpName + ": " + FailureReason,
304 DiagnosticInst);
305}
306
307bool AtomicExpandImpl::tryInsertTrailingSeqCstFence(Instruction *AtomicI) {
309 return false;
310
311 IRBuilder Builder(AtomicI);
312 if (auto *TrailingFence = TLI->emitTrailingFence(
313 Builder, AtomicI, AtomicOrdering::SequentiallyConsistent)) {
314 TrailingFence->moveAfter(AtomicI);
315 return true;
316 }
317 return false;
318}
319
320template <typename AtomicInst>
321bool AtomicExpandImpl::tryInsertFencesForAtomic(AtomicInst *AtomicI,
322 bool OrderingRequiresFence,
323 AtomicOrdering NewOrdering) {
324 bool ShouldInsertFences = TLI->shouldInsertFencesForAtomic(AtomicI);
325 if (OrderingRequiresFence && ShouldInsertFences) {
326 AtomicOrdering FenceOrdering = AtomicI->getOrdering();
327 AtomicI->setOrdering(NewOrdering);
328 return bracketInstWithFences(AtomicI, FenceOrdering);
329 }
330 if (!ShouldInsertFences)
331 return tryInsertTrailingSeqCstFence(AtomicI);
332 return false;
333}
334
335bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {
336 if (auto *LI = dyn_cast<LoadInst>(I)) {
337 if (!LI->isAtomic())
338 return false;
339
340 if (!atomicSizeSupported(TLI, LI)) {
341 expandAtomicLoadToLibcall(LI);
342 return true;
343 }
344
345 bool MadeChange = false;
346 if (TLI->shouldCastAtomicLoadInIR(LI) ==
347 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
348 LI = convertAtomicLoadToIntegerType(LI);
349 MadeChange = true;
350 }
351
352 MadeChange |= tryInsertFencesForAtomic(
353 LI, isAcquireOrStronger(LI->getOrdering()), AtomicOrdering::Monotonic);
354
355 MadeChange |= tryExpandAtomicLoad(LI);
356 return MadeChange;
357 }
358
359 if (auto *SI = dyn_cast<StoreInst>(I)) {
360 if (!SI->isAtomic())
361 return false;
362
363 if (!atomicSizeSupported(TLI, SI)) {
364 expandAtomicStoreToLibcall(SI);
365 return true;
366 }
367
368 bool MadeChange = false;
369 if (TLI->shouldCastAtomicStoreInIR(SI) ==
370 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
371 SI = convertAtomicStoreToIntegerType(SI);
372 MadeChange = true;
373 }
374
375 MadeChange |= tryInsertFencesForAtomic(
376 SI, isReleaseOrStronger(SI->getOrdering()), AtomicOrdering::Monotonic);
377
378 MadeChange |= tryExpandAtomicStore(SI);
379 return MadeChange;
380 }
381
382 if (auto *RMWI = dyn_cast<AtomicRMWInst>(I)) {
383 if (!atomicSizeSupported(TLI, RMWI)) {
384 expandAtomicRMWToLibcall(RMWI);
385 return true;
386 }
387
388 bool MadeChange = false;
389 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
390 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
391 RMWI = convertAtomicXchgToIntegerType(RMWI);
392 MadeChange = true;
393 }
394
395 MadeChange |= tryInsertFencesForAtomic(
396 RMWI,
397 isReleaseOrStronger(RMWI->getOrdering()) ||
398 isAcquireOrStronger(RMWI->getOrdering()),
400
401 // There are two different ways of expanding RMW instructions:
402 // - into a load if it is idempotent
403 // - into a Cmpxchg/LL-SC loop otherwise
404 // we try them in that order.
405 MadeChange |= (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) ||
406 tryExpandAtomicRMW(RMWI);
407 return MadeChange;
408 }
409
410 if (auto *CASI = dyn_cast<AtomicCmpXchgInst>(I)) {
411 if (!atomicSizeSupported(TLI, CASI)) {
412 expandAtomicCASToLibcall(CASI);
413 return true;
414 }
415
416 // TODO: when we're ready to make the change at the IR level, we can
417 // extend convertCmpXchgToInteger for floating point too.
418 bool MadeChange = false;
419 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
420 // TODO: add a TLI hook to control this so that each target can
421 // convert to lowering the original type one at a time.
422 CASI = convertCmpXchgToIntegerType(CASI);
423 MadeChange = true;
424 }
425
426 auto CmpXchgExpansion = TLI->shouldExpandAtomicCmpXchgInIR(CASI);
427 if (TLI->shouldInsertFencesForAtomic(CASI)) {
428 if (CmpXchgExpansion == TargetLoweringBase::AtomicExpansionKind::None &&
429 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
430 isAcquireOrStronger(CASI->getSuccessOrdering()) ||
431 isAcquireOrStronger(CASI->getFailureOrdering()))) {
432 // If a compare and swap is lowered to LL/SC, we can do smarter fence
433 // insertion, with a stronger one on the success path than on the
434 // failure path. As a result, fence insertion is directly done by
435 // expandAtomicCmpXchg in that case.
436 AtomicOrdering FenceOrdering = CASI->getMergedOrdering();
437 AtomicOrdering CASOrdering =
439 CASI->setSuccessOrdering(CASOrdering);
440 CASI->setFailureOrdering(CASOrdering);
441 MadeChange |= bracketInstWithFences(CASI, FenceOrdering);
442 }
443 } else if (CmpXchgExpansion !=
444 TargetLoweringBase::AtomicExpansionKind::LLSC) {
445 // CmpXchg LLSC is handled in expandAtomicCmpXchg().
446 MadeChange |= tryInsertTrailingSeqCstFence(CASI);
447 }
448
449 MadeChange |= tryExpandAtomicCmpXchg(CASI);
450 return MadeChange;
451 }
452
453 return false;
454}
455
456bool AtomicExpandImpl::run(
457 Function &F, const LibcallLoweringModuleAnalysisResult &LibcallResult,
458 const TargetMachine *TM) {
459 const auto *Subtarget = TM->getSubtargetImpl(F);
460 if (!Subtarget->enableAtomicExpand())
461 return false;
462 TLI = Subtarget->getTargetLowering();
463 LibcallLowering = &LibcallResult.getLibcallLowering(*Subtarget);
464 DL = &F.getDataLayout();
465
466 bool MadeChange = false;
467
468 for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {
469 BasicBlock *BB = &*BBI;
470
472
473 for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;
474 I = Next) {
475 Instruction &Inst = *I;
476 Next = std::next(I);
477
478 if (processAtomicInstr(&Inst)) {
479 MadeChange = true;
480
481 // New blocks may have been inserted.
482 BBE = F.end();
483 }
484 }
485 }
486
487 return MadeChange;
488}
489
490bool AtomicExpandLegacy::runOnFunction(Function &F) {
491
492 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
493 if (!TPC)
494 return false;
495 auto *TM = &TPC->getTM<TargetMachine>();
496
497 const LibcallLoweringModuleAnalysisResult &LibcallResult =
498 getAnalysis<LibcallLoweringInfoWrapper>().getResult(*F.getParent());
499 AtomicExpandImpl AE;
500 return AE.run(F, LibcallResult, TM);
501}
502
504 return new AtomicExpandLegacy();
505}
506
509 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
510
511 const LibcallLoweringModuleAnalysisResult *LibcallResult =
512 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(*F.getParent());
513
514 if (!LibcallResult) {
515 F.getContext().emitError("'" + LibcallLoweringModuleAnalysis::name() +
516 "' analysis required");
517 return PreservedAnalyses::all();
518 }
519
520 AtomicExpandImpl AE;
521
522 bool Changed = AE.run(F, *LibcallResult, TM);
523 if (!Changed)
524 return PreservedAnalyses::all();
525
527}
528
529bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,
530 AtomicOrdering Order) {
531 ReplacementIRBuilder Builder(I, *DL);
532
533 auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
534
535 auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
536 // We have a guard here because not every atomic operation generates a
537 // trailing fence.
538 if (TrailingFence)
539 TrailingFence->moveAfter(I);
540
541 return (LeadingFence || TrailingFence);
542}
543
544/// Get the iX type with the same bitwidth as T.
546AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {
547 EVT VT = TLI->getMemValueType(DL, T);
548 unsigned BitWidth = VT.getStoreSizeInBits();
549 assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
550 return IntegerType::get(T->getContext(), BitWidth);
551}
552
553/// Convert an atomic load of a non-integral type to an integer load of the
554/// equivalent bitwidth. See the function comment on
555/// convertAtomicStoreToIntegerType for background.
556LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {
557 auto *M = LI->getModule();
558 Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());
559
560 ReplacementIRBuilder Builder(LI, *DL);
561
562 Value *Addr = LI->getPointerOperand();
563
564 auto *NewLI = Builder.CreateLoad(NewTy, Addr);
565 NewLI->setAlignment(LI->getAlign());
566 NewLI->setVolatile(LI->isVolatile());
567 NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
568 LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
569
570 Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
571 LI->replaceAllUsesWith(NewVal);
572 LI->eraseFromParent();
573 return NewLI;
574}
575
576AtomicRMWInst *
577AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {
579
580 auto *M = RMWI->getModule();
581 Type *NewTy =
582 getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());
583
584 ReplacementIRBuilder Builder(RMWI, *DL);
585
586 Value *Addr = RMWI->getPointerOperand();
587 Value *Val = RMWI->getValOperand();
588 Value *NewVal = Val->getType()->isPointerTy()
589 ? Builder.CreatePtrToInt(Val, NewTy)
590 : Builder.CreateBitCast(Val, NewTy);
591
592 auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,
593 RMWI->getAlign(), RMWI->getOrdering(),
594 RMWI->getSyncScopeID());
595 NewRMWI->setVolatile(RMWI->isVolatile());
596 copyMetadataForAtomic(*NewRMWI, *RMWI);
597 LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");
598
599 Value *NewRVal = RMWI->getType()->isPointerTy()
600 ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())
601 : Builder.CreateBitCast(NewRMWI, RMWI->getType());
602 RMWI->replaceAllUsesWith(NewRVal);
603 RMWI->eraseFromParent();
604 return NewRMWI;
605}
606
607bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {
608 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
609 case TargetLoweringBase::AtomicExpansionKind::None:
610 return false;
611 case TargetLoweringBase::AtomicExpansionKind::LLSC:
612 expandAtomicOpToLLSC(
613 LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),
614 LI->getOrdering(),
615 [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });
616 return true;
617 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
618 return expandAtomicLoadToLL(LI);
619 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
620 return expandAtomicLoadToCmpXchg(LI);
621 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
622 LI->setAtomic(AtomicOrdering::NotAtomic);
623 return true;
624 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
625 TLI->emitExpandAtomicLoad(LI);
626 return true;
627 default:
628 llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
629 }
630}
631
632bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {
633 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
634 case TargetLoweringBase::AtomicExpansionKind::None:
635 return false;
636 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
637 TLI->emitExpandAtomicStore(SI);
638 return true;
639 case TargetLoweringBase::AtomicExpansionKind::Expand:
640 expandAtomicStoreToXChg(SI);
641 return true;
642 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
643 SI->setAtomic(AtomicOrdering::NotAtomic);
644 return true;
645 default:
646 llvm_unreachable("Unhandled case in tryExpandAtomicStore");
647 }
648}
649
650bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {
651 ReplacementIRBuilder Builder(LI, *DL);
652
653 // On some architectures, load-linked instructions are atomic for larger
654 // sizes than normal loads. For example, the only 64-bit load guaranteed
655 // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
656 Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),
657 LI->getPointerOperand(), LI->getOrdering());
659
660 LI->replaceAllUsesWith(Val);
661 LI->eraseFromParent();
662
663 return true;
664}
665
666bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {
667 ReplacementIRBuilder Builder(LI, *DL);
668 AtomicOrdering Order = LI->getOrdering();
669 if (Order == AtomicOrdering::Unordered)
670 Order = AtomicOrdering::Monotonic;
671
672 Value *Addr = LI->getPointerOperand();
673 Type *Ty = LI->getType();
674 Constant *DummyVal = Constant::getNullValue(Ty);
675
676 Value *Pair = Builder.CreateAtomicCmpXchg(
677 Addr, DummyVal, DummyVal, LI->getAlign(), Order,
679 Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
680
681 LI->replaceAllUsesWith(Loaded);
682 LI->eraseFromParent();
683
684 return true;
685}
686
687/// Convert an atomic store of a non-integral type to an integer store of the
688/// equivalent bitwidth. We used to not support floating point or vector
689/// atomics in the IR at all. The backends learned to deal with the bitcast
690/// idiom because that was the only way of expressing the notion of a atomic
691/// float or vector store. The long term plan is to teach each backend to
692/// instruction select from the original atomic store, but as a migration
693/// mechanism, we convert back to the old format which the backends understand.
694/// Each backend will need individual work to recognize the new format.
695StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {
696 ReplacementIRBuilder Builder(SI, *DL);
697 auto *M = SI->getModule();
698 Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
699 M->getDataLayout());
700 Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
701
702 Value *Addr = SI->getPointerOperand();
703
704 StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);
705 NewSI->setAlignment(SI->getAlign());
706 NewSI->setVolatile(SI->isVolatile());
707 NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
708 LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
709 SI->eraseFromParent();
710 return NewSI;
711}
712
713void AtomicExpandImpl::expandAtomicStoreToXChg(StoreInst *SI) {
714 // This function is only called on atomic stores that are too large to be
715 // atomic if implemented as a native store. So we replace them by an
716 // atomic swap, that can be implemented for example as a ldrex/strex on ARM
717 // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
718 // It is the responsibility of the target to only signal expansion via
719 // shouldExpandAtomicRMW in cases where this is required and possible.
720 ReplacementIRBuilder Builder(SI, *DL);
721 AtomicOrdering Ordering = SI->getOrdering();
722 assert(Ordering != AtomicOrdering::NotAtomic);
723 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
724 ? AtomicOrdering::Monotonic
725 : Ordering;
726 AtomicRMWInst *AI = Builder.CreateAtomicRMW(
727 AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),
728 SI->getAlign(), RMWOrdering);
729 SI->eraseFromParent();
730
731 // Now we have an appropriate swap instruction, lower it as usual.
732 tryExpandAtomicRMW(AI);
733}
734
735static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,
736 Value *Loaded, Value *NewVal, Align AddrAlign,
737 AtomicOrdering MemOpOrder, SyncScope::ID SSID,
738 bool IsVolatile, Value *&Success,
739 Value *&NewLoaded, Instruction *MetadataSrc) {
740 Type *OrigTy = NewVal->getType();
741
742 // This code can go away when cmpxchg supports FP and vector types.
743 assert(!OrigTy->isPointerTy());
744 bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();
745 if (NeedBitcast) {
746 IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());
747 NewVal = Builder.CreateBitCast(NewVal, IntTy);
748 Loaded = Builder.CreateBitCast(Loaded, IntTy);
749 }
750
751 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
752 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
754 Pair->setVolatile(IsVolatile);
755 if (MetadataSrc)
756 copyMetadataForAtomic(*Pair, *MetadataSrc);
757
758 Success = Builder.CreateExtractValue(Pair, 1, "success");
759 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
760
761 if (NeedBitcast)
762 NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);
763}
764
765bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {
766 LLVMContext &Ctx = AI->getModule()->getContext();
767 TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);
768 switch (Kind) {
769 case TargetLoweringBase::AtomicExpansionKind::None:
770 return false;
771 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
772 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
773 unsigned ValueSize = getAtomicOpSize(AI);
774 if (ValueSize < MinCASSize) {
775 expandPartwordAtomicRMW(AI,
776 TargetLoweringBase::AtomicExpansionKind::LLSC);
777 } else {
778 auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {
779 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
780 AI->getValOperand());
781 };
782 expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
783 AI->getAlign(), AI->getOrdering(), PerformOp);
784 }
785 return true;
786 }
787 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
788 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
789 unsigned ValueSize = getAtomicOpSize(AI);
790 if (ValueSize < MinCASSize) {
791 expandPartwordAtomicRMW(AI,
792 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
793 } else {
795 Ctx.getSyncScopeNames(SSNs);
796 auto MemScope = SSNs[AI->getSyncScopeID()].empty()
797 ? "system"
798 : SSNs[AI->getSyncScopeID()];
799 OptimizationRemarkEmitter ORE(AI->getFunction());
800 ORE.emit([&]() {
801 return OptimizationRemark(DEBUG_TYPE, "Passed", AI)
802 << "A compare and swap loop was generated for an atomic "
803 << AI->getOperationName(AI->getOperation()) << " operation at "
804 << MemScope << " memory scope";
805 });
806 expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
807 }
808 return true;
809 }
810 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
811 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
812 unsigned ValueSize = getAtomicOpSize(AI);
813 if (ValueSize < MinCASSize) {
815 // Widen And/Or/Xor and give the target another chance at expanding it.
818 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
819 return true;
820 }
821 }
822 expandAtomicRMWToMaskedIntrinsic(AI);
823 return true;
824 }
825 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
827 return true;
828 }
829 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
831 return true;
832 }
833 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
834 return lowerAtomicRMWInst(AI);
835 case TargetLoweringBase::AtomicExpansionKind::CustomExpand:
836 TLI->emitExpandAtomicRMW(AI);
837 return true;
838 default:
839 llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
840 }
841}
842
843namespace {
844
845struct PartwordMaskValues {
846 // These three fields are guaranteed to be set by createMaskInstrs.
847 Type *WordType = nullptr;
848 Type *ValueType = nullptr;
849 Type *IntValueType = nullptr;
850 Value *AlignedAddr = nullptr;
851 Align AlignedAddrAlignment;
852 // The remaining fields can be null.
853 Value *ShiftAmt = nullptr;
854 Value *Mask = nullptr;
855 Value *Inv_Mask = nullptr;
856};
857
858[[maybe_unused]]
859raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {
860 auto PrintObj = [&O](auto *V) {
861 if (V)
862 O << *V;
863 else
864 O << "nullptr";
865 O << '\n';
866 };
867 O << "PartwordMaskValues {\n";
868 O << " WordType: ";
869 PrintObj(PMV.WordType);
870 O << " ValueType: ";
871 PrintObj(PMV.ValueType);
872 O << " AlignedAddr: ";
873 PrintObj(PMV.AlignedAddr);
874 O << " AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';
875 O << " ShiftAmt: ";
876 PrintObj(PMV.ShiftAmt);
877 O << " Mask: ";
878 PrintObj(PMV.Mask);
879 O << " Inv_Mask: ";
880 PrintObj(PMV.Inv_Mask);
881 O << "}\n";
882 return O;
883}
884
885} // end anonymous namespace
886
887/// This is a helper function which builds instructions to provide
888/// values necessary for partword atomic operations. It takes an
889/// incoming address, Addr, and ValueType, and constructs the address,
890/// shift-amounts and masks needed to work with a larger value of size
891/// WordSize.
892///
893/// AlignedAddr: Addr rounded down to a multiple of WordSize
894///
895/// ShiftAmt: Number of bits to right-shift a WordSize value loaded
896/// from AlignAddr for it to have the same value as if
897/// ValueType was loaded from Addr.
898///
899/// Mask: Value to mask with the value loaded from AlignAddr to
900/// include only the part that would've been loaded from Addr.
901///
902/// Inv_Mask: The inverse of Mask.
903static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,
905 Value *Addr, Align AddrAlign,
906 unsigned MinWordSize) {
907 PartwordMaskValues PMV;
908
909 Module *M = I->getModule();
910 LLVMContext &Ctx = M->getContext();
911 const DataLayout &DL = M->getDataLayout();
912 unsigned ValueSize = DL.getTypeStoreSize(ValueType);
913
914 PMV.ValueType = PMV.IntValueType = ValueType;
915 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
916 PMV.IntValueType =
917 Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());
918
919 PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)
920 : ValueType;
921 if (PMV.ValueType == PMV.WordType) {
922 PMV.AlignedAddr = Addr;
923 PMV.AlignedAddrAlignment = AddrAlign;
924 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
925 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);
926 return PMV;
927 }
928
929 PMV.AlignedAddrAlignment = Align(MinWordSize);
930
931 assert(ValueSize < MinWordSize);
932
933 PointerType *PtrTy = cast<PointerType>(Addr->getType());
934 IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());
935 Value *PtrLSB;
936
937 if (AddrAlign < MinWordSize) {
938 PMV.AlignedAddr = Builder.CreateIntrinsic(
939 Intrinsic::ptrmask, {PtrTy, IntTy},
940 {Addr, ConstantInt::getSigned(IntTy, ~(uint64_t)(MinWordSize - 1))},
941 nullptr, "AlignedAddr");
942
943 Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);
944 PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");
945 } else {
946 // If the alignment is high enough, the LSB are known 0.
947 PMV.AlignedAddr = Addr;
948 PtrLSB = ConstantInt::getNullValue(IntTy);
949 }
950
951 if (DL.isLittleEndian()) {
952 // turn bytes into bits
953 PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
954 } else {
955 // turn bytes into bits, and count from the other side.
956 PMV.ShiftAmt = Builder.CreateShl(
957 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
958 }
959
960 PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");
961 PMV.Mask = Builder.CreateShl(
962 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
963 "Mask");
964
965 PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");
966
967 return PMV;
968}
969
970static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,
971 const PartwordMaskValues &PMV) {
972 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
973 if (PMV.WordType == PMV.ValueType)
974 return WideWord;
975
976 Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");
977 Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");
978 return Builder.CreateBitCast(Trunc, PMV.ValueType);
979}
980
981static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,
982 Value *Updated, const PartwordMaskValues &PMV) {
983 assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");
984 assert(Updated->getType() == PMV.ValueType && "Value type mismatch");
985 if (PMV.WordType == PMV.ValueType)
986 return Updated;
987
988 Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);
989
990 Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");
991 Value *Shift =
992 Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);
993 Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");
994 Value *Or = Builder.CreateOr(And, Shift, "inserted");
995 return Or;
996}
997
998/// Emit IR to implement a masked version of a given atomicrmw
999/// operation. (That is, only the bits under the Mask should be
1000/// affected by the operation)
1002 IRBuilderBase &Builder, Value *Loaded,
1003 Value *Shifted_Inc, Value *Inc,
1004 const PartwordMaskValues &PMV) {
1005 // TODO: update to use
1006 // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
1007 // to merge bits from two values without requiring PMV.Inv_Mask.
1008 switch (Op) {
1009 case AtomicRMWInst::Xchg: {
1010 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
1011 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
1012 return FinalVal;
1013 }
1014 case AtomicRMWInst::Or:
1015 case AtomicRMWInst::Xor:
1016 case AtomicRMWInst::And:
1017 llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
1018 case AtomicRMWInst::Add:
1019 case AtomicRMWInst::Sub:
1020 case AtomicRMWInst::Nand: {
1021 // The other arithmetic ops need to be masked into place.
1022 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);
1023 Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
1024 Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
1025 Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
1026 return FinalVal;
1027 }
1028 case AtomicRMWInst::Max:
1029 case AtomicRMWInst::Min:
1044 // Finally, other ops will operate on the full value, so truncate down to
1045 // the original size, and expand out again after doing the
1046 // operation. Bitcasts will be inserted for FP values.
1047 Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);
1048 Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);
1049 Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);
1050 return FinalVal;
1051 }
1052 default:
1053 llvm_unreachable("Unknown atomic op");
1054 }
1055}
1056
1057/// Expand a sub-word atomicrmw operation into an appropriate
1058/// word-sized operation.
1059///
1060/// It will create an LL/SC or cmpxchg loop, as appropriate, the same
1061/// way as a typical atomicrmw expansion. The only difference here is
1062/// that the operation inside of the loop may operate upon only a
1063/// part of the value.
1064void AtomicExpandImpl::expandPartwordAtomicRMW(
1065 AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
1066 // Widen And/Or/Xor and give the target another chance at expanding it.
1070 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
1071 return;
1072 }
1073 AtomicOrdering MemOpOrder = AI->getOrdering();
1074 SyncScope::ID SSID = AI->getSyncScopeID();
1075
1076 ReplacementIRBuilder Builder(AI, *DL);
1077
1078 PartwordMaskValues PMV =
1079 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1080 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1081
1082 Value *ValOperand_Shifted = nullptr;
1085 Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);
1086 ValOperand_Shifted =
1087 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
1088 "ValOperand_Shifted");
1089 }
1090
1091 auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {
1092 return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,
1093 AI->getValOperand(), PMV);
1094 };
1095
1096 Value *OldResult;
1097 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
1098 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1099 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
1100 AI->isVolatile(), PerformPartwordOp,
1102 } else {
1103 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
1104 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
1105 PMV.AlignedAddrAlignment, MemOpOrder,
1106 PerformPartwordOp);
1107 }
1108
1109 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1110 AI->replaceAllUsesWith(FinalOldResult);
1111 AI->eraseFromParent();
1112}
1113
1114// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
1115AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
1116 ReplacementIRBuilder Builder(AI, *DL);
1118
1120 Op == AtomicRMWInst::And) &&
1121 "Unable to widen operation");
1122
1123 PartwordMaskValues PMV =
1124 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1125 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1126
1127 Value *ValOperand_Shifted =
1128 Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
1129 PMV.ShiftAmt, "ValOperand_Shifted");
1130
1131 Value *NewOperand;
1132
1133 if (Op == AtomicRMWInst::And)
1134 NewOperand =
1135 Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");
1136 else
1137 NewOperand = ValOperand_Shifted;
1138
1139 AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(
1140 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1141 AI->getOrdering(), AI->getSyncScopeID());
1142
1143 copyMetadataForAtomic(*NewAI, *AI);
1144
1145 Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);
1146 AI->replaceAllUsesWith(FinalOldResult);
1147 AI->eraseFromParent();
1148 return NewAI;
1149}
1150
1151bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
1152 // The basic idea here is that we're expanding a cmpxchg of a
1153 // smaller memory size up to a word-sized cmpxchg. To do this, we
1154 // need to add a retry-loop for strong cmpxchg, so that
1155 // modifications to other parts of the word don't cause a spurious
1156 // failure.
1157
1158 // This generates code like the following:
1159 // [[Setup mask values PMV.*]]
1160 // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
1161 // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
1162 // %InitLoaded = load i32* %addr
1163 // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
1164 // br partword.cmpxchg.loop
1165 // partword.cmpxchg.loop:
1166 // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
1167 // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
1168 // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
1169 // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
1170 // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
1171 // i32 %FullWord_NewVal success_ordering failure_ordering
1172 // %OldVal = extractvalue { i32, i1 } %NewCI, 0
1173 // %Success = extractvalue { i32, i1 } %NewCI, 1
1174 // br i1 %Success, label %partword.cmpxchg.end,
1175 // label %partword.cmpxchg.failure
1176 // partword.cmpxchg.failure:
1177 // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
1178 // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
1179 // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
1180 // label %partword.cmpxchg.end
1181 // partword.cmpxchg.end:
1182 // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
1183 // %FinalOldVal = trunc i32 %tmp1 to i8
1184 // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
1185 // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
1186
1187 Value *Addr = CI->getPointerOperand();
1188 Value *Cmp = CI->getCompareOperand();
1189 Value *NewVal = CI->getNewValOperand();
1190
1191 BasicBlock *BB = CI->getParent();
1192 Function *F = BB->getParent();
1193 ReplacementIRBuilder Builder(CI, *DL);
1194 LLVMContext &Ctx = Builder.getContext();
1195
1196 BasicBlock *EndBB =
1197 BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
1198 auto FailureBB =
1199 BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
1200 auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
1201
1202 // The split call above "helpfully" added a branch at the end of BB
1203 // (to the wrong place).
1204 std::prev(BB->end())->eraseFromParent();
1205 Builder.SetInsertPoint(BB);
1206
1207 PartwordMaskValues PMV =
1208 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1209 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1210
1211 // Shift the incoming values over, into the right location in the word.
1212 Value *NewVal_Shifted =
1213 Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1214 Value *Cmp_Shifted =
1215 Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1216
1217 // Load the entire current word, and mask into place the expected and new
1218 // values
1219 LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1220 Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1221 Builder.CreateBr(LoopBB);
1222
1223 // partword.cmpxchg.loop:
1224 Builder.SetInsertPoint(LoopBB);
1225 PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
1226 Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
1227
1228 // The initial load must be atomic with the same synchronization scope
1229 // to avoid a data race with concurrent stores. If the instruction being
1230 // emulated is volatile, issue a volatile load.
1231 // addIncoming is done first so that any replaceAllUsesWith calls during
1232 // normalization correctly update the PHI incoming value.
1233 InitLoaded->setVolatile(CI->isVolatile());
1235 InitLoaded->setAtomic(AtomicOrdering::Monotonic, CI->getSyncScopeID());
1236 // The newly created load might need to be lowered further. Because it is
1237 // created in the same block as the atomicrmw, the AtomicExpand loop will
1238 // not process it again.
1239 processAtomicInstr(InitLoaded);
1240 }
1241
1242 // Mask/Or the expected and new values into place in the loaded word.
1243 Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1244 Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1245 AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
1246 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1248 NewCI->setVolatile(CI->isVolatile());
1249 // When we're building a strong cmpxchg, we need a loop, so you
1250 // might think we could use a weak cmpxchg inside. But, using strong
1251 // allows the below comparison for ShouldContinue, and we're
1252 // expecting the underlying cmpxchg to be a machine instruction,
1253 // which is strong anyways.
1254 NewCI->setWeak(CI->isWeak());
1255
1256 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1257 Value *Success = Builder.CreateExtractValue(NewCI, 1);
1258
1259 if (CI->isWeak())
1260 Builder.CreateBr(EndBB);
1261 else
1262 Builder.CreateCondBr(Success, EndBB, FailureBB);
1263
1264 // partword.cmpxchg.failure:
1265 Builder.SetInsertPoint(FailureBB);
1266 // Upon failure, verify that the masked-out part of the loaded value
1267 // has been modified. If it didn't, abort the cmpxchg, since the
1268 // masked-in part must've.
1269 Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1270 Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1271 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1272
1273 // Add the second value to the phi from above
1274 Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
1275
1276 // partword.cmpxchg.end:
1277 Builder.SetInsertPoint(CI);
1278
1279 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1280 Value *Res = PoisonValue::get(CI->getType());
1281 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1282 Res = Builder.CreateInsertValue(Res, Success, 1);
1283
1284 CI->replaceAllUsesWith(Res);
1285 CI->eraseFromParent();
1286 return true;
1287}
1288
1289void AtomicExpandImpl::expandAtomicOpToLLSC(
1290 Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,
1291 AtomicOrdering MemOpOrder,
1292 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1293 ReplacementIRBuilder Builder(I, *DL);
1294 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,
1295 MemOpOrder, PerformOp);
1296
1297 I->replaceAllUsesWith(Loaded);
1298 I->eraseFromParent();
1299}
1300
1301void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
1302 ReplacementIRBuilder Builder(AI, *DL);
1303
1304 PartwordMaskValues PMV =
1305 createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
1306 AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1307
1308 // The value operand must be sign-extended for signed min/max so that the
1309 // target's signed comparison instructions can be used. Otherwise, just
1310 // zero-ext.
1311 Instruction::CastOps CastOp = Instruction::ZExt;
1312 AtomicRMWInst::BinOp RMWOp = AI->getOperation();
1313 if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
1314 CastOp = Instruction::SExt;
1315
1316 Value *ValOperand_Shifted = Builder.CreateShl(
1317 Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
1318 PMV.ShiftAmt, "ValOperand_Shifted");
1319 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1320 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1321 AI->getOrdering());
1322 Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);
1323 AI->replaceAllUsesWith(FinalOldResult);
1324 AI->eraseFromParent();
1325}
1326
1327void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1328 AtomicCmpXchgInst *CI) {
1329 ReplacementIRBuilder Builder(CI, *DL);
1330
1331 PartwordMaskValues PMV = createMaskInstrs(
1332 Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),
1333 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1334
1335 Value *CmpVal_Shifted = Builder.CreateShl(
1336 Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,
1337 "CmpVal_Shifted");
1338 Value *NewVal_Shifted = Builder.CreateShl(
1339 Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,
1340 "NewVal_Shifted");
1342 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1343 CI->getMergedOrdering());
1344 Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);
1345 Value *Res = PoisonValue::get(CI->getType());
1346 Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
1347 Value *Success = Builder.CreateICmpEQ(
1348 CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");
1349 Res = Builder.CreateInsertValue(Res, Success, 1);
1350
1351 CI->replaceAllUsesWith(Res);
1352 CI->eraseFromParent();
1353}
1354
1355Value *AtomicExpandImpl::insertRMWLLSCLoop(
1356 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1357 AtomicOrdering MemOpOrder,
1358 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {
1359 LLVMContext &Ctx = Builder.getContext();
1360 BasicBlock *BB = Builder.GetInsertBlock();
1361 Function *F = BB->getParent();
1362
1363 assert(AddrAlign >= F->getDataLayout().getTypeStoreSize(ResultTy) &&
1364 "Expected at least natural alignment at this point.");
1365
1366 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1367 //
1368 // The standard expansion we produce is:
1369 // [...]
1370 // atomicrmw.start:
1371 // %loaded = @load.linked(%addr)
1372 // %new = some_op iN %loaded, %incr
1373 // %stored = @store_conditional(%new, %addr)
1374 // %try_again = icmp i32 ne %stored, 0
1375 // br i1 %try_again, label %loop, label %atomicrmw.end
1376 // atomicrmw.end:
1377 // [...]
1378 BasicBlock *ExitBB =
1379 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1380 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1381
1382 // The split call above "helpfully" added a branch at the end of BB (to the
1383 // wrong place).
1384 std::prev(BB->end())->eraseFromParent();
1385 Builder.SetInsertPoint(BB);
1386 Builder.CreateBr(LoopBB);
1387
1388 // Start the main loop block now that we've taken care of the preliminaries.
1389 Builder.SetInsertPoint(LoopBB);
1390 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);
1391
1392 Value *NewVal = PerformOp(Builder, Loaded);
1393
1394 Value *StoreSuccess =
1395 TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
1396 Value *TryAgain = Builder.CreateICmpNE(
1397 StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
1398
1399 Instruction *CondBr = Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1400
1401 // Atomic RMW expands to a Load-linked / Store-Conditional loop, because it is
1402 // hard to predict precise branch weigths we mark the branch as "unknown"
1403 // (50/50) to prevent misleading optimizations.
1405
1406 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1407 return Loaded;
1408}
1409
1410/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
1411/// the equivalent bitwidth. We used to not support pointer cmpxchg in the
1412/// IR. As a migration step, we convert back to what use to be the standard
1413/// way to represent a pointer cmpxchg so that we can update backends one by
1414/// one.
1415AtomicCmpXchgInst *
1416AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
1417 auto *M = CI->getModule();
1418 Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1419 M->getDataLayout());
1420
1421 ReplacementIRBuilder Builder(CI, *DL);
1422
1423 Value *Addr = CI->getPointerOperand();
1424
1425 Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1426 Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1427
1428 auto *NewCI = Builder.CreateAtomicCmpXchg(
1429 Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),
1430 CI->getFailureOrdering(), CI->getSyncScopeID());
1431 NewCI->setVolatile(CI->isVolatile());
1432 NewCI->setWeak(CI->isWeak());
1433 LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1434
1435 Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1436 Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1437
1438 OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1439
1440 Value *Res = PoisonValue::get(CI->getType());
1441 Res = Builder.CreateInsertValue(Res, OldVal, 0);
1442 Res = Builder.CreateInsertValue(Res, Succ, 1);
1443
1444 CI->replaceAllUsesWith(Res);
1445 CI->eraseFromParent();
1446 return NewCI;
1447}
1448
1449bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1450 AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1451 AtomicOrdering FailureOrder = CI->getFailureOrdering();
1452 Value *Addr = CI->getPointerOperand();
1453 BasicBlock *BB = CI->getParent();
1454 Function *F = BB->getParent();
1455 LLVMContext &Ctx = F->getContext();
1456 // If shouldInsertFencesForAtomic() returns true, then the target does not
1457 // want to deal with memory orders, and emitLeading/TrailingFence should take
1458 // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1459 // should preserve the ordering.
1460 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1461 AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic
1462 ? AtomicOrdering::Monotonic
1463 : CI->getMergedOrdering();
1464
1465 // In implementations which use a barrier to achieve release semantics, we can
1466 // delay emitting this barrier until we know a store is actually going to be
1467 // attempted. The cost of this delay is that we need 2 copies of the block
1468 // emitting the load-linked, affecting code size.
1469 //
1470 // Ideally, this logic would be unconditional except for the minsize check
1471 // since in other cases the extra blocks naturally collapse down to the
1472 // minimal loop. Unfortunately, this puts too much stress on later
1473 // optimisations so we avoid emitting the extra logic in those cases too.
1474 bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1475 SuccessOrder != AtomicOrdering::Monotonic &&
1476 SuccessOrder != AtomicOrdering::Acquire &&
1477 !F->hasMinSize();
1478
1479 // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1480 // do it even on minsize.
1481 bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();
1482
1483 // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1484 //
1485 // The full expansion we produce is:
1486 // [...]
1487 // %aligned.addr = ...
1488 // cmpxchg.start:
1489 // %unreleasedload = @load.linked(%aligned.addr)
1490 // %unreleasedload.extract = extract value from %unreleasedload
1491 // %should_store = icmp eq %unreleasedload.extract, %desired
1492 // br i1 %should_store, label %cmpxchg.releasingstore,
1493 // label %cmpxchg.nostore
1494 // cmpxchg.releasingstore:
1495 // fence?
1496 // br label cmpxchg.trystore
1497 // cmpxchg.trystore:
1498 // %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],
1499 // [%releasedload, %cmpxchg.releasedload]
1500 // %updated.new = insert %new into %loaded.trystore
1501 // %stored = @store_conditional(%updated.new, %aligned.addr)
1502 // %success = icmp eq i32 %stored, 0
1503 // br i1 %success, label %cmpxchg.success,
1504 // label %cmpxchg.releasedload/%cmpxchg.failure
1505 // cmpxchg.releasedload:
1506 // %releasedload = @load.linked(%aligned.addr)
1507 // %releasedload.extract = extract value from %releasedload
1508 // %should_store = icmp eq %releasedload.extract, %desired
1509 // br i1 %should_store, label %cmpxchg.trystore,
1510 // label %cmpxchg.failure
1511 // cmpxchg.success:
1512 // fence?
1513 // br label %cmpxchg.end
1514 // cmpxchg.nostore:
1515 // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1516 // [%releasedload,
1517 // %cmpxchg.releasedload/%cmpxchg.trystore]
1518 // @load_linked_fail_balance()?
1519 // br label %cmpxchg.failure
1520 // cmpxchg.failure:
1521 // fence?
1522 // br label %cmpxchg.end
1523 // cmpxchg.end:
1524 // %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],
1525 // [%loaded.trystore, %cmpxchg.trystore]
1526 // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1527 // %loaded = extract value from %loaded.exit
1528 // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1529 // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1530 // [...]
1531 BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1532 auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1533 auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1534 auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1535 auto ReleasedLoadBB =
1536 BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1537 auto TryStoreBB =
1538 BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1539 auto ReleasingStoreBB =
1540 BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1541 auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1542
1543 ReplacementIRBuilder Builder(CI, *DL);
1544
1545 // The split call above "helpfully" added a branch at the end of BB (to the
1546 // wrong place), but we might want a fence too. It's easiest to just remove
1547 // the branch entirely.
1548 std::prev(BB->end())->eraseFromParent();
1549 Builder.SetInsertPoint(BB);
1550 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1551 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1552
1553 PartwordMaskValues PMV =
1554 createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,
1555 CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1556 Builder.CreateBr(StartBB);
1557
1558 // Start the main loop block now that we've taken care of the preliminaries.
1559 Builder.SetInsertPoint(StartBB);
1560 Value *UnreleasedLoad =
1561 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1562 Value *UnreleasedLoadExtract =
1563 extractMaskedValue(Builder, UnreleasedLoad, PMV);
1564 Value *ShouldStore = Builder.CreateICmpEQ(
1565 UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");
1566
1567 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1568 // jump straight past that fence instruction (if it exists).
1569 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB,
1570 MDBuilder(F->getContext()).createLikelyBranchWeights());
1571
1572 Builder.SetInsertPoint(ReleasingStoreBB);
1573 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1574 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1575 Builder.CreateBr(TryStoreBB);
1576
1577 Builder.SetInsertPoint(TryStoreBB);
1578 PHINode *LoadedTryStore =
1579 Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");
1580 LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1581 Value *NewValueInsert =
1582 insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);
1583 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1584 PMV.AlignedAddr, MemOpOrder);
1585 StoreSuccess = Builder.CreateICmpEQ(
1586 StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1587 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1588 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1589 CI->isWeak() ? FailureBB : RetryBB,
1590 MDBuilder(F->getContext()).createLikelyBranchWeights());
1591
1592 Builder.SetInsertPoint(ReleasedLoadBB);
1593 Value *SecondLoad;
1594 if (HasReleasedLoadBB) {
1595 SecondLoad =
1596 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1597 Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);
1598 ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,
1599 CI->getCompareOperand(), "should_store");
1600
1601 // If the cmpxchg doesn't actually need any ordering when it fails, we can
1602 // jump straight past that fence instruction (if it exists).
1603 Builder.CreateCondBr(
1604 ShouldStore, TryStoreBB, NoStoreBB,
1605 MDBuilder(F->getContext()).createLikelyBranchWeights());
1606 // Update PHI node in TryStoreBB.
1607 LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);
1608 } else
1609 Builder.CreateUnreachable();
1610
1611 // Make sure later instructions don't get reordered with a fence if
1612 // necessary.
1613 Builder.SetInsertPoint(SuccessBB);
1614 if (ShouldInsertFencesForAtomic ||
1616 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1617 Builder.CreateBr(ExitBB);
1618
1619 Builder.SetInsertPoint(NoStoreBB);
1620 PHINode *LoadedNoStore =
1621 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");
1622 LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);
1623 if (HasReleasedLoadBB)
1624 LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);
1625
1626 // In the failing case, where we don't execute the store-conditional, the
1627 // target might want to balance out the load-linked with a dedicated
1628 // instruction (e.g., on ARM, clearing the exclusive monitor).
1630 Builder.CreateBr(FailureBB);
1631
1632 Builder.SetInsertPoint(FailureBB);
1633 PHINode *LoadedFailure =
1634 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");
1635 LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);
1636 if (CI->isWeak())
1637 LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);
1638 if (ShouldInsertFencesForAtomic)
1639 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1640 Builder.CreateBr(ExitBB);
1641
1642 // Finally, we have control-flow based knowledge of whether the cmpxchg
1643 // succeeded or not. We expose this to later passes by converting any
1644 // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1645 // PHI.
1646 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1647 PHINode *LoadedExit =
1648 Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");
1649 LoadedExit->addIncoming(LoadedTryStore, SuccessBB);
1650 LoadedExit->addIncoming(LoadedFailure, FailureBB);
1651 PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");
1652 Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1653 Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1654
1655 // This is the "exit value" from the cmpxchg expansion. It may be of
1656 // a type wider than the one in the cmpxchg instruction.
1657 Value *LoadedFull = LoadedExit;
1658
1659 Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));
1660 Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);
1661
1662 // Look for any users of the cmpxchg that are just comparing the loaded value
1663 // against the desired one, and replace them with the CFG-derived version.
1665 for (auto *User : CI->users()) {
1666 ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
1667 if (!EV)
1668 continue;
1669
1670 assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1671 "weird extraction from { iN, i1 }");
1672
1673 if (EV->getIndices()[0] == 0)
1674 EV->replaceAllUsesWith(Loaded);
1675 else
1677
1678 PrunedInsts.push_back(EV);
1679 }
1680
1681 // We can remove the instructions now we're no longer iterating through them.
1682 for (auto *EV : PrunedInsts)
1683 EV->eraseFromParent();
1684
1685 if (!CI->use_empty()) {
1686 // Some use of the full struct return that we don't understand has happened,
1687 // so we've got to reconstruct it properly.
1688 Value *Res;
1689 Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);
1690 Res = Builder.CreateInsertValue(Res, Success, 1);
1691
1692 CI->replaceAllUsesWith(Res);
1693 }
1694
1695 CI->eraseFromParent();
1696 return true;
1697}
1698
1699bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
1700 // TODO: Add floating point support.
1701 auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1702 if (!C)
1703 return false;
1704
1705 switch (RMWI->getOperation()) {
1706 case AtomicRMWInst::Add:
1707 case AtomicRMWInst::Sub:
1708 case AtomicRMWInst::Or:
1709 case AtomicRMWInst::Xor:
1710 return C->isZero();
1711 case AtomicRMWInst::And:
1712 return C->isMinusOne();
1713 case AtomicRMWInst::Min:
1714 return C->isMaxValue(true);
1715 case AtomicRMWInst::Max:
1716 return C->isMinValue(true);
1718 return C->isMaxValue(false);
1720 return C->isMinValue(false);
1721 default:
1722 return false;
1723 }
1724}
1725
1726bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {
1727 if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1728 tryExpandAtomicLoad(ResultingLoad);
1729 return true;
1730 }
1731 return false;
1732}
1733
1734Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1735 IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,
1736 AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile,
1737 function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,
1738 CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {
1739 LLVMContext &Ctx = Builder.getContext();
1740 BasicBlock *BB = Builder.GetInsertBlock();
1741 Function *F = BB->getParent();
1742
1743 // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1744 //
1745 // The standard expansion we produce is:
1746 // [...]
1747 // %init_loaded = load atomic iN* %addr
1748 // br label %loop
1749 // loop:
1750 // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1751 // %new = some_op iN %loaded, %incr
1752 // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1753 // %new_loaded = extractvalue { iN, i1 } %pair, 0
1754 // %success = extractvalue { iN, i1 } %pair, 1
1755 // br i1 %success, label %atomicrmw.end, label %loop
1756 // atomicrmw.end:
1757 // [...]
1758 BasicBlock *ExitBB =
1759 BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1760 BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1761
1762 // The split call above "helpfully" added a branch at the end of BB (to the
1763 // wrong place), but we want a load. It's easiest to just remove
1764 // the branch entirely.
1765 std::prev(BB->end())->eraseFromParent();
1766 Builder.SetInsertPoint(BB);
1767 LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);
1768 Builder.CreateBr(LoopBB);
1769
1770 // Start the main loop block now that we've taken care of the preliminaries.
1771 Builder.SetInsertPoint(LoopBB);
1772 PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1773 Loaded->addIncoming(InitLoaded, BB);
1774
1775 // The initial load must be atomic with the same synchronization scope
1776 // to avoid a data race with concurrent stores. If the instruction being
1777 // emulated is volatile, issue a volatile load.
1778 // addIncoming is done first so that any replaceAllUsesWith calls during
1779 // normalization correctly update the PHI incoming value.
1780 InitLoaded->setVolatile(IsVolatile);
1782 InitLoaded->setAtomic(AtomicOrdering::Monotonic, SSID);
1783 // The newly created load might need to be lowered further. Because it is
1784 // created in the same block as the atomicrmw, the AtomicExpand loop will
1785 // not process it again.
1786 processAtomicInstr(InitLoaded);
1787 }
1788
1789 Value *NewVal = PerformOp(Builder, Loaded);
1790
1791 Value *NewLoaded = nullptr;
1792 Value *Success = nullptr;
1793
1794 CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,
1795 MemOpOrder == AtomicOrdering::Unordered
1796 ? AtomicOrdering::Monotonic
1797 : MemOpOrder,
1798 SSID, IsVolatile, Success, NewLoaded, MetadataSrc);
1799 assert(Success && NewLoaded);
1800
1801 Loaded->addIncoming(NewLoaded, LoopBB);
1802
1803 Instruction *CondBr = Builder.CreateCondBr(Success, ExitBB, LoopBB);
1804
1805 // Atomic RMW expands to a cmpxchg loop, Since precise branch weights
1806 // cannot be easily determined here, we mark the branch as "unknown" (50/50)
1807 // to prevent misleading optimizations.
1809
1810 Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1811 return NewLoaded;
1812}
1813
1814bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1815 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1816 unsigned ValueSize = getAtomicOpSize(CI);
1817
1818 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1819 default:
1820 llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1821 case TargetLoweringBase::AtomicExpansionKind::None:
1822 if (ValueSize < MinCASSize)
1823 return expandPartwordCmpXchg(CI);
1824 return false;
1825 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1826 return expandAtomicCmpXchg(CI);
1827 }
1828 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1829 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1830 return true;
1831 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1832 return lowerAtomicCmpXchgInst(CI);
1833 case TargetLoweringBase::AtomicExpansionKind::CustomExpand: {
1834 TLI->emitExpandAtomicCmpXchg(CI);
1835 return true;
1836 }
1837 }
1838}
1839
1840bool AtomicExpandImpl::expandAtomicRMWToCmpXchg(
1841 AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg) {
1842 ReplacementIRBuilder Builder(AI, AI->getDataLayout());
1843 Builder.setIsFPConstrained(
1844 AI->getFunction()->hasFnAttribute(Attribute::StrictFP));
1845
1846 // FIXME: If FP exceptions are observable, we should force them off for the
1847 // loop for the FP atomics.
1848 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1849 Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),
1850 AI->getOrdering(), AI->getSyncScopeID(), AI->isVolatile(),
1851 [&](IRBuilderBase &Builder, Value *Loaded) {
1852 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1853 AI->getValOperand());
1854 },
1855 CreateCmpXchg, /*MetadataSrc=*/AI);
1856
1857 AI->replaceAllUsesWith(Loaded);
1858 AI->eraseFromParent();
1859 return true;
1860}
1861
1862// In order to use one of the sized library calls such as
1863// __atomic_fetch_add_4, the alignment must be sufficient, the size
1864// must be one of the potentially-specialized sizes, and the value
1865// type must actually exist in C on the target (otherwise, the
1866// function wouldn't actually be defined.)
1867static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,
1868 const DataLayout &DL) {
1869 // TODO: "LargestSize" is an approximation for "largest type that
1870 // you can express in C". It seems to be the case that int128 is
1871 // supported on all 64-bit platforms, otherwise only up to 64-bit
1872 // integers are supported. If we get this wrong, then we'll try to
1873 // call a sized libcall that doesn't actually exist. There should
1874 // really be some more reliable way in LLVM of determining integer
1875 // sizes which are valid in the target's C ABI...
1876 unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1877 return Alignment >= Size &&
1878 (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1879 Size <= LargestSize;
1880}
1881
1882void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {
1883 static const RTLIB::Libcall Libcalls[6] = {
1884 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1885 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1886 unsigned Size = getAtomicOpSize(I);
1887
1888 bool Expanded = expandAtomicOpToLibcall(
1889 I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,
1890 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1891 if (!Expanded)
1892 handleUnsupportedAtomicSize(I, "atomic load");
1893}
1894
1895void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {
1896 static const RTLIB::Libcall Libcalls[6] = {
1897 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1898 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1899 unsigned Size = getAtomicOpSize(I);
1900
1901 bool Expanded = expandAtomicOpToLibcall(
1902 I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),
1903 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1904 if (!Expanded)
1905 handleUnsupportedAtomicSize(I, "atomic store");
1906}
1907
1908void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I,
1909 const Twine &AtomicOpName,
1910 Instruction *DiagnosticInst) {
1911 static const RTLIB::Libcall Libcalls[6] = {
1912 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1913 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1914 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1915 unsigned Size = getAtomicOpSize(I);
1916
1917 bool Expanded = expandAtomicOpToLibcall(
1918 I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),
1919 I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
1920 Libcalls);
1921 if (!Expanded)
1922 handleUnsupportedAtomicSize(I, AtomicOpName, DiagnosticInst);
1923}
1924
1926 static const RTLIB::Libcall LibcallsXchg[6] = {
1927 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1928 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1929 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1930 static const RTLIB::Libcall LibcallsAdd[6] = {
1931 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1932 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1933 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1934 static const RTLIB::Libcall LibcallsSub[6] = {
1935 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1936 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1937 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1938 static const RTLIB::Libcall LibcallsAnd[6] = {
1939 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1940 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1941 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1942 static const RTLIB::Libcall LibcallsOr[6] = {
1943 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1944 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1945 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1946 static const RTLIB::Libcall LibcallsXor[6] = {
1947 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1948 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1949 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1950 static const RTLIB::Libcall LibcallsNand[6] = {
1951 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1952 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1953 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1954
1955 switch (Op) {
1957 llvm_unreachable("Should not have BAD_BINOP.");
1959 return ArrayRef(LibcallsXchg);
1960 case AtomicRMWInst::Add:
1961 return ArrayRef(LibcallsAdd);
1962 case AtomicRMWInst::Sub:
1963 return ArrayRef(LibcallsSub);
1964 case AtomicRMWInst::And:
1965 return ArrayRef(LibcallsAnd);
1966 case AtomicRMWInst::Or:
1967 return ArrayRef(LibcallsOr);
1968 case AtomicRMWInst::Xor:
1969 return ArrayRef(LibcallsXor);
1971 return ArrayRef(LibcallsNand);
1972 case AtomicRMWInst::Max:
1973 case AtomicRMWInst::Min:
1988 // No atomic libcalls are available for these.
1989 return {};
1990 }
1991 llvm_unreachable("Unexpected AtomicRMW operation.");
1992}
1993
1994void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1995 ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
1996
1997 unsigned Size = getAtomicOpSize(I);
1998
1999 bool Success = false;
2000 if (!Libcalls.empty())
2001 Success = expandAtomicOpToLibcall(
2002 I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),
2003 nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
2004
2005 // The expansion failed: either there were no libcalls at all for
2006 // the operation (min/max), or there were only size-specialized
2007 // libcalls (add/sub/etc) and we needed a generic. So, expand to a
2008 // CAS libcall, via a CAS loop, instead.
2009 if (!Success) {
2010 expandAtomicRMWToCmpXchg(
2011 I, [this, I](IRBuilderBase &Builder, Value *Addr, Value *Loaded,
2012 Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,
2013 SyncScope::ID SSID, bool IsVolatile, Value *&Success,
2014 Value *&NewLoaded, Instruction *MetadataSrc) {
2015 // Create the CAS instruction normally...
2016 AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
2017 Addr, Loaded, NewVal, Alignment, MemOpOrder,
2019 Pair->setVolatile(IsVolatile);
2020 if (MetadataSrc)
2021 copyMetadataForAtomic(*Pair, *MetadataSrc);
2022
2023 Success = Builder.CreateExtractValue(Pair, 1, "success");
2024 NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
2025
2026 // ...and then expand the CAS into a libcall.
2027 expandAtomicCASToLibcall(
2028 Pair,
2029 "atomicrmw " + AtomicRMWInst::getOperationName(I->getOperation()),
2030 MetadataSrc);
2031 });
2032 }
2033}
2034
2035// A helper routine for the above expandAtomic*ToLibcall functions.
2036//
2037// 'Libcalls' contains an array of enum values for the particular
2038// ATOMIC libcalls to be emitted. All of the other arguments besides
2039// 'I' are extracted from the Instruction subclass by the
2040// caller. Depending on the particular call, some will be null.
2041bool AtomicExpandImpl::expandAtomicOpToLibcall(
2042 Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,
2043 Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
2044 AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
2045 assert(Libcalls.size() == 6);
2046
2047 LLVMContext &Ctx = I->getContext();
2048 Module *M = I->getModule();
2049 const DataLayout &DL = M->getDataLayout();
2050 IRBuilder<> Builder(I);
2051 IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
2052
2053 bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);
2054 Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
2055
2056 if (M->getTargetTriple().isOSWindows() && M->getTargetTriple().isX86_64() &&
2057 Size == 16) {
2058 // x86_64 Windows passes i128 as an XMM vector; on return, it is in
2059 // XMM0, and as a parameter, it is passed indirectly. The generic lowering
2060 // rules handles this correctly if we pass it as a v2i64 rather than
2061 // i128. This is what Clang does in the frontend for such types as well
2062 // (see WinX86_64ABIInfo::classify in Clang).
2063 SizedIntTy = FixedVectorType::get(Type::getInt64Ty(Ctx), 2);
2064 }
2065
2066 const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);
2067
2068 // TODO: the "order" argument type is "int", not int32. So
2069 // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
2070 assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
2071 Constant *OrderingVal =
2072 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
2073 Constant *Ordering2Val = nullptr;
2074 if (CASExpected) {
2075 assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
2076 Ordering2Val =
2077 ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
2078 }
2079 bool HasResult = I->getType() != Type::getVoidTy(Ctx);
2080
2081 RTLIB::Libcall RTLibType;
2082 if (UseSizedLibcall) {
2083 switch (Size) {
2084 case 1:
2085 RTLibType = Libcalls[1];
2086 break;
2087 case 2:
2088 RTLibType = Libcalls[2];
2089 break;
2090 case 4:
2091 RTLibType = Libcalls[3];
2092 break;
2093 case 8:
2094 RTLibType = Libcalls[4];
2095 break;
2096 case 16:
2097 RTLibType = Libcalls[5];
2098 break;
2099 }
2100 } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
2101 RTLibType = Libcalls[0];
2102 } else {
2103 // Can't use sized function, and there's no generic for this
2104 // operation, so give up.
2105 return false;
2106 }
2107
2108 RTLIB::LibcallImpl LibcallImpl = LibcallLowering->getLibcallImpl(RTLibType);
2109 if (LibcallImpl == RTLIB::Unsupported) {
2110 // This target does not implement the requested atomic libcall so give up.
2111 return false;
2112 }
2113
2114 // Build up the function call. There's two kinds. First, the sized
2115 // variants. These calls are going to be one of the following (with
2116 // N=1,2,4,8,16):
2117 // iN __atomic_load_N(iN *ptr, int ordering)
2118 // void __atomic_store_N(iN *ptr, iN val, int ordering)
2119 // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
2120 // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
2121 // int success_order, int failure_order)
2122 //
2123 // Note that these functions can be used for non-integer atomic
2124 // operations, the values just need to be bitcast to integers on the
2125 // way in and out.
2126 //
2127 // And, then, the generic variants. They look like the following:
2128 // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
2129 // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
2130 // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
2131 // int ordering)
2132 // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
2133 // void *desired, int success_order,
2134 // int failure_order)
2135 //
2136 // The different signatures are built up depending on the
2137 // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
2138 // variables.
2139
2140 AllocaInst *AllocaCASExpected = nullptr;
2141 AllocaInst *AllocaValue = nullptr;
2142 AllocaInst *AllocaResult = nullptr;
2143
2144 Type *ResultTy;
2146 AttributeList Attr;
2147
2148 // 'size' argument.
2149 if (!UseSizedLibcall) {
2150 // Note, getIntPtrType is assumed equivalent to size_t.
2151 Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
2152 }
2153
2154 // 'ptr' argument.
2155 // note: This assumes all address spaces share a common libfunc
2156 // implementation and that addresses are convertable. For systems without
2157 // that property, we'd need to extend this mechanism to support AS-specific
2158 // families of atomic intrinsics.
2159 Value *PtrVal = PointerOperand;
2160 PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));
2161 Args.push_back(PtrVal);
2162
2163 // 'expected' argument, if present.
2164 if (CASExpected) {
2165 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
2166 AllocaCASExpected->setAlignment(AllocaAlignment);
2167 Builder.CreateLifetimeStart(AllocaCASExpected);
2168 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
2169 Args.push_back(AllocaCASExpected);
2170 }
2171
2172 // 'val' argument ('desired' for cas), if present.
2173 if (ValueOperand) {
2174 if (UseSizedLibcall) {
2175 Value *IntValue =
2176 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
2177 Args.push_back(IntValue);
2178 } else {
2179 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
2180 AllocaValue->setAlignment(AllocaAlignment);
2181 Builder.CreateLifetimeStart(AllocaValue);
2182 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
2183 Args.push_back(AllocaValue);
2184 }
2185 }
2186
2187 // 'ret' argument.
2188 if (!CASExpected && HasResult && !UseSizedLibcall) {
2189 AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
2190 AllocaResult->setAlignment(AllocaAlignment);
2191 Builder.CreateLifetimeStart(AllocaResult);
2192 Args.push_back(AllocaResult);
2193 }
2194
2195 // 'ordering' ('success_order' for cas) argument.
2196 Args.push_back(OrderingVal);
2197
2198 // 'failure_order' argument, if present.
2199 if (Ordering2Val)
2200 Args.push_back(Ordering2Val);
2201
2202 // Now, the return type.
2203 if (CASExpected) {
2204 ResultTy = Type::getInt1Ty(Ctx);
2205 Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);
2206 } else if (HasResult && UseSizedLibcall)
2207 ResultTy = SizedIntTy;
2208 else
2209 ResultTy = Type::getVoidTy(Ctx);
2210
2211 // Done with setting up arguments and return types, create the call:
2213 for (Value *Arg : Args)
2214 ArgTys.push_back(Arg->getType());
2215 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
2216 FunctionCallee LibcallFn = M->getOrInsertFunction(
2218 Attr);
2219 CallInst *Call = Builder.CreateCall(LibcallFn, Args);
2220 Call->setAttributes(Attr);
2221 Value *Result = Call;
2222
2223 // And then, extract the results...
2224 if (ValueOperand && !UseSizedLibcall)
2225 Builder.CreateLifetimeEnd(AllocaValue);
2226
2227 if (CASExpected) {
2228 // The final result from the CAS is {load of 'expected' alloca, bool result
2229 // from call}
2230 Type *FinalResultTy = I->getType();
2231 Value *V = PoisonValue::get(FinalResultTy);
2232 Value *ExpectedOut = Builder.CreateAlignedLoad(
2233 CASExpected->getType(), AllocaCASExpected, AllocaAlignment);
2234 Builder.CreateLifetimeEnd(AllocaCASExpected);
2235 V = Builder.CreateInsertValue(V, ExpectedOut, 0);
2236 V = Builder.CreateInsertValue(V, Result, 1);
2238 } else if (HasResult) {
2239 Value *V;
2240 if (UseSizedLibcall)
2241 V = Builder.CreateBitOrPointerCast(Result, I->getType());
2242 else {
2243 V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,
2244 AllocaAlignment);
2245 Builder.CreateLifetimeEnd(AllocaResult);
2246 }
2248 }
2249 I->eraseFromParent();
2250 return true;
2251}
#define Success
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, bool IsVolatile, Value *&Success, Value *&NewLoaded, Instruction *MetadataSrc)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static void writeUnsupportedAtomicSizeReason(const TargetLowering *TLI, Inst *I, raw_ostream &OS)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static bool runOnFunction(Function &F, bool PostInlining)
#define DEBUG_TYPE
Module.h This file contains the declarations for the Module class.
static bool isIdempotentRMW(AtomicRMWInst &RMWI)
Return true if and only if the given instruction does not modify the memory location referenced.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Machine Check Debug Module
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
#define T
FunctionAnalysisManager FAM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
This file contains the declarations for profiling metadata utility functions.
This file defines the SmallString class.
This file defines the SmallVector class.
#define LLVM_DEBUG(...)
Definition Debug.h:114
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
void setAlignment(Align Align)
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
bool empty() const
empty - Check if the array is empty.
Definition ArrayRef.h:137
An instruction that atomically checks whether a specified value is in a memory location,...
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ FMinimum
*p = minimum(old, v) minimum matches the behavior of llvm.minimum.
@ Min
*p = old <signed v ? old : v
@ Sub
*p = old - v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FMaximum
*p = maximum(old, v) maximum matches the behavior of llvm.maximum.
@ FSub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMaximumNum
*p = maximumnum(old, v) maximumnum matches the behavior of llvm.maximumnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ FMinimumNum
*p = minimumnum(old, v) minimumnum matches the behavior of llvm.minimumnum.
@ Nand
*p = ~(old & v)
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static LLVM_ABI StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
iterator end()
Definition BasicBlock.h:474
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:461
LLVM_ABI BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
reverse_iterator rbegin()
Definition BasicBlock.h:477
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
InstListType::reverse_iterator reverse_iterator
Definition BasicBlock.h:172
reverse_iterator rend()
Definition BasicBlock.h:479
void setAttributes(AttributeList A)
Set the attributes for this call.
static LLVM_ABI ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
static LLVM_ABI ConstantInt * getFalse(LLVMContext &Context)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
ArrayRef< unsigned > getIndices() const
unsigned getNumIndices() const
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
BasicBlockListType::iterator iterator
Definition Function.h:70
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1964
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2674
LLVM_ABI CallInst * CreateLifetimeStart(Value *Ptr)
Create a lifetime.start intrinsic.
LLVM_ABI CallInst * CreateLifetimeEnd(Value *Ptr)
Create a lifetime.end intrinsic.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition IRBuilder.h:1930
CondBrInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition IRBuilder.h:1237
UnreachableInst * CreateUnreachable()
Definition IRBuilder.h:1379
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition IRBuilder.h:2667
BasicBlock::iterator GetInsertPoint() const
Definition IRBuilder.h:202
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2230
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition IRBuilder.h:2269
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2371
UncondBrInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1231
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2317
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2532
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:2367
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition IRBuilder.h:358
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition IRBuilder.h:1913
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition IRBuilder.h:1532
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition IRBuilder.h:2113
LLVMContext & getContext() const
Definition IRBuilder.h:203
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition IRBuilder.h:1591
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2225
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition IRBuilder.h:2546
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition IRBuilder.h:1977
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition IRBuilder.h:1949
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="", bool IsDisjoint=false)
Definition IRBuilder.h:1613
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition IRBuilder.h:2240
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition IRBuilder.h:75
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2847
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI void moveAfter(Instruction *MovePos)
Unlink this instruction from its current basic block and insert it into the basic block that MovePos ...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:354
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void emitError(const Instruction *I, const Twine &ErrorStr)
emitError - Emit an error message to the currently installed error handler with optional location inf...
LLVM_ABI void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
Tracks which library functions to use for a particular subtarget.
LLVM_ABI RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Return the lowering's selection of implementation call for Call.
Record a mapping from subtarget to LibcallLoweringInfo.
const LibcallLoweringInfo & getLibcallLowering(const TargetSubtargetInfo &Subtarget) const
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
Metadata node.
Definition Metadata.h:1080
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
LLVMContext & getContext() const
Get the global data context.
Definition Module.h:285
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition Pass.cpp:112
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition Analysis.h:115
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
virtual Value * emitStoreConditional(IRBuilderBase &Builder, Value *Val, Value *Addr, AtomicOrdering Ord) const
Perform a store-conditional operation to Addr.
EVT getMemValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
virtual void emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a bit test atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
virtual bool shouldInsertFencesForAtomic(const Instruction *I) const
Whether AtomicExpandPass should automatically insert fences and reduce ordering for this atomic.
virtual AtomicOrdering atomicOperationOrderAfterFenceSplit(const Instruction *I) const
virtual void emitExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) const
Perform a cmpxchg expansion using a target-specific method.
unsigned getMinCmpXchgSizeInBits() const
Returns the size of the smallest cmpxchg or ll/sc instruction the backend supports.
virtual Value * emitMaskedAtomicRMWIntrinsic(IRBuilderBase &Builder, AtomicRMWInst *AI, Value *AlignedAddr, Value *Incr, Value *Mask, Value *ShiftAmt, AtomicOrdering Ord) const
Perform a masked atomicrmw using a target-specific intrinsic.
virtual AtomicExpansionKind shouldExpandAtomicCmpXchgInIR(const AtomicCmpXchgInst *AI) const
Returns how the given atomic cmpxchg should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitLoadLinked(IRBuilderBase &Builder, Type *ValueTy, Value *Addr, AtomicOrdering Ord) const
Perform a load-linked operation on Addr, returning a "Value *" with the corresponding pointee type.
virtual void emitExpandAtomicRMW(AtomicRMWInst *AI) const
Perform a atomicrmw expansion using a target-specific way.
virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilderBase &Builder) const
virtual void emitExpandAtomicStore(StoreInst *SI) const
Perform a atomic store using a target-specific way.
virtual AtomicExpansionKind shouldCastAtomicRMWIInIR(AtomicRMWInst *RMWI) const
Returns how the given atomic atomicrmw should be cast by the IR-level AtomicExpand pass.
virtual bool shouldInsertTrailingSeqCstFenceForAtomicStore(const Instruction *I) const
Whether AtomicExpandPass should automatically insert a seq_cst trailing fence without reducing the or...
virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be expanded by the IR-level AtomicExpand pass.
virtual Value * emitMaskedAtomicCmpXchgIntrinsic(IRBuilderBase &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const
Perform a masked cmpxchg using a target-specific intrinsic.
virtual bool shouldIssueAtomicLoadForAtomicEmulationLoop(void) const
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
virtual void emitExpandAtomicLoad(LoadInst *LI) const
Perform a atomic load using a target-specific way.
virtual AtomicExpansionKind shouldExpandAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be expanded by the IR-level AtomicExpand pass into.
virtual void emitCmpArithAtomicRMWIntrinsic(AtomicRMWInst *AI) const
Perform a atomicrmw which the result is only used by comparison, using a target-specific intrinsic.
virtual AtomicExpansionKind shouldCastAtomicStoreInIR(StoreInst *SI) const
Returns how the given (atomic) store should be cast by the IR-level AtomicExpand pass into.
virtual Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
virtual AtomicExpansionKind shouldCastAtomicLoadInIR(LoadInst *LI) const
Returns how the given (atomic) load should be cast by the IR-level AtomicExpand pass.
virtual Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const
Inserts in the IR a target-specific intrinsic specifying a fence.
virtual LoadInst * lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const
On some platforms, an AtomicRMW that never actually modifies the value (such as fetch_add of 0) can b...
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
virtual const TargetSubtargetInfo * getSubtargetImpl(const Function &) const
Virtual method implemented by subclasses that returns a reference to that target's TargetSubtargetInf...
Target-Independent Code Generator Pass Configuration Options.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
bool isPointerTy() const
True if this is an instance of PointerType.
Definition Type.h:284
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition Type.h:186
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:317
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
bool use_empty() const
Definition Value.h:346
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
friend class Instruction
Iterator for Instructions in a `BasicBlock.
Definition BasicBlock.h:73
This is an optimization pass for GlobalISel generic memory operations.
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
LLVM_ABI bool canInstructionHaveMMRAs(const Instruction &I)
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
OuterAnalysisManagerProxy< ModuleAnalysisManager, Function > ModuleAnalysisManagerFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
LLVM_ABI char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
Extended Value Type.
Definition ValueTypes.h:35
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition ValueTypes.h:420
Matching combinators.
static StringRef getLibcallImplName(RTLIB::LibcallImpl CallImpl)
Get the libcall routine name for the specified libcall implementation.