61#define DEBUG_TYPE "atomic-expand"
65class AtomicExpandImpl {
73 bool tryExpandAtomicLoad(
LoadInst *LI);
74 bool expandAtomicLoadToLL(
LoadInst *LI);
75 bool expandAtomicLoadToCmpXchg(
LoadInst *LI);
85 void expandAtomicOpToLLSC(
89 void expandPartwordAtomicRMW(
97 static Value *insertRMWCmpXchgLoop(
113 void expandAtomicLoadToLibcall(
LoadInst *LI);
114 void expandAtomicStoreToLibcall(
StoreInst *LI);
138struct ReplacementIRBuilder
139 :
IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
150 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
153 MMRAMD =
I->getMetadata(LLVMContext::MD_mmra);
158 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
164char AtomicExpandLegacy::ID = 0;
169 "Expand Atomic instructions",
false,
false)
177 return DL.getTypeStoreSize(LI->getType());
182 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
198template <
typename Inst>
201 Align Alignment =
I->getAlign();
202 return Alignment >=
Size &&
207 const auto *Subtarget =
TM->getSubtargetImpl(
F);
208 if (!Subtarget->enableAtomicExpand())
210 TLI = Subtarget->getTargetLowering();
211 DL = &
F.getDataLayout();
218 if (
I.isAtomic() && !isa<FenceInst>(&
I))
221 bool MadeChange =
false;
222 for (
auto *
I : AtomicInsts) {
223 auto LI = dyn_cast<LoadInst>(
I);
224 auto SI = dyn_cast<StoreInst>(
I);
225 auto RMWI = dyn_cast<AtomicRMWInst>(
I);
226 auto CASI = dyn_cast<AtomicCmpXchgInst>(
I);
227 assert((LI || SI || RMWI || CASI) &&
"Unknown atomic instruction");
232 expandAtomicLoadToLibcall(LI);
238 expandAtomicStoreToLibcall(SI);
244 expandAtomicRMWToLibcall(RMWI);
250 expandAtomicCASToLibcall(CASI);
256 if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
257 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
258 I = LI = convertAtomicLoadToIntegerType(LI);
261 TLI->shouldCastAtomicStoreInIR(SI) ==
262 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
263 I =
SI = convertAtomicStoreToIntegerType(SI);
266 TLI->shouldCastAtomicRMWIInIR(RMWI) ==
267 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
268 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
273 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
276 I = CASI = convertCmpXchgToIntegerType(CASI);
281 if (TLI->shouldInsertFencesForAtomic(
I)) {
282 auto FenceOrdering = AtomicOrdering::Monotonic;
284 FenceOrdering = LI->getOrdering();
285 LI->setOrdering(AtomicOrdering::Monotonic);
287 FenceOrdering =
SI->getOrdering();
288 SI->setOrdering(AtomicOrdering::Monotonic);
291 FenceOrdering = RMWI->getOrdering();
292 RMWI->setOrdering(AtomicOrdering::Monotonic);
294 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
295 TargetLoweringBase::AtomicExpansionKind::None &&
303 FenceOrdering = CASI->getMergedOrdering();
304 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
305 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
308 if (FenceOrdering != AtomicOrdering::Monotonic) {
309 MadeChange |= bracketInstWithFences(
I, FenceOrdering);
311 }
else if (
I->hasAtomicStore() &&
312 TLI->shouldInsertTrailingFenceForAtomicStore(
I)) {
313 auto FenceOrdering = AtomicOrdering::Monotonic;
315 FenceOrdering =
SI->getOrdering();
317 FenceOrdering = RMWI->getOrdering();
318 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
319 TargetLoweringBase::AtomicExpansionKind::LLSC)
321 FenceOrdering = CASI->getSuccessOrdering();
324 if (
auto TrailingFence =
325 TLI->emitTrailingFence(Builder,
I, FenceOrdering)) {
326 TrailingFence->moveAfter(
I);
332 MadeChange |= tryExpandAtomicLoad(LI);
334 MadeChange |= tryExpandAtomicStore(SI);
341 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
344 MadeChange |= tryExpandAtomicRMW(RMWI);
347 MadeChange |= tryExpandAtomicCmpXchg(CASI);
352bool AtomicExpandLegacy::runOnFunction(
Function &
F) {
354 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
359 return AE.run(
F,
TM);
363 return new AtomicExpandLegacy();
370 bool Changed = AE.run(
F,
TM);
377bool AtomicExpandImpl::bracketInstWithFences(
Instruction *
I,
379 ReplacementIRBuilder Builder(
I, *
DL);
381 auto LeadingFence = TLI->emitLeadingFence(Builder,
I, Order);
383 auto TrailingFence = TLI->emitTrailingFence(Builder,
I, Order);
387 TrailingFence->moveAfter(
I);
389 return (LeadingFence || TrailingFence);
395 EVT VT = TLI->getMemValueType(
DL,
T);
406 Type *NewTy = getCorrespondingIntegerType(LI->
getType(),
M->getDataLayout());
408 ReplacementIRBuilder Builder(LI, *
DL);
412 auto *NewLI = Builder.CreateLoad(NewTy,
Addr);
413 NewLI->setAlignment(LI->
getAlign());
416 LLVM_DEBUG(
dbgs() <<
"Replaced " << *LI <<
" with " << *NewLI <<
"\n");
418 Value *NewVal = Builder.CreateBitCast(NewLI, LI->
getType());
425AtomicExpandImpl::convertAtomicXchgToIntegerType(
AtomicRMWInst *RMWI) {
428 getCorrespondingIntegerType(RMWI->
getType(),
M->getDataLayout());
430 ReplacementIRBuilder Builder(RMWI, *
DL);
435 ? Builder.CreatePtrToInt(Val, NewTy)
436 : Builder.CreateBitCast(Val, NewTy);
442 LLVM_DEBUG(
dbgs() <<
"Replaced " << *RMWI <<
" with " << *NewRMWI <<
"\n");
445 ? Builder.CreateIntToPtr(NewRMWI, RMWI->
getType())
446 : Builder.CreateBitCast(NewRMWI, RMWI->
getType());
452bool AtomicExpandImpl::tryExpandAtomicLoad(
LoadInst *LI) {
453 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
457 expandAtomicOpToLLSC(
463 return expandAtomicLoadToLL(LI);
465 return expandAtomicLoadToCmpXchg(LI);
474bool AtomicExpandImpl::tryExpandAtomicStore(
StoreInst *SI) {
475 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
479 expandAtomicStore(SI);
489bool AtomicExpandImpl::expandAtomicLoadToLL(
LoadInst *LI) {
490 ReplacementIRBuilder Builder(LI, *
DL);
495 Value *Val = TLI->emitLoadLinked(Builder, LI->
getType(),
497 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
505bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(
LoadInst *LI) {
506 ReplacementIRBuilder Builder(LI, *
DL);
515 Value *Pair = Builder.CreateAtomicCmpXchg(
518 Value *Loaded = Builder.CreateExtractValue(Pair, 0,
"loaded");
535 ReplacementIRBuilder Builder(SI, *
DL);
536 auto *
M =
SI->getModule();
537 Type *NewTy = getCorrespondingIntegerType(
SI->getValueOperand()->getType(),
539 Value *NewVal = Builder.CreateBitCast(
SI->getValueOperand(), NewTy);
547 LLVM_DEBUG(
dbgs() <<
"Replaced " << *SI <<
" with " << *NewSI <<
"\n");
548 SI->eraseFromParent();
552void AtomicExpandImpl::expandAtomicStore(
StoreInst *SI) {
559 ReplacementIRBuilder Builder(SI, *
DL);
567 SI->getAlign(), RMWOrdering);
568 SI->eraseFromParent();
571 tryExpandAtomicRMW(AI);
590 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
599bool AtomicExpandImpl::tryExpandAtomicRMW(
AtomicRMWInst *AI) {
606 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
608 if (ValueSize < MinCASSize) {
609 expandPartwordAtomicRMW(AI,
622 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
624 if (ValueSize < MinCASSize) {
625 expandPartwordAtomicRMW(AI,
636 <<
"A compare and swap loop was generated for an atomic "
638 << MemScope <<
" memory scope";
645 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
647 if (ValueSize < MinCASSize) {
652 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
656 expandAtomicRMWToMaskedIntrinsic(AI);
660 TLI->emitBitTestAtomicRMWIntrinsic(AI);
664 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
670 TLI->emitExpandAtomicRMW(AI);
679struct PartwordMaskValues {
681 Type *WordType =
nullptr;
683 Type *IntValueType =
nullptr;
684 Value *AlignedAddr =
nullptr;
685 Align AlignedAddrAlignment;
687 Value *ShiftAmt =
nullptr;
689 Value *Inv_Mask =
nullptr;
694 auto PrintObj = [&
O](
auto *
V) {
701 O <<
"PartwordMaskValues {\n";
703 PrintObj(PMV.WordType);
705 PrintObj(PMV.ValueType);
706 O <<
" AlignedAddr: ";
707 PrintObj(PMV.AlignedAddr);
708 O <<
" AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() <<
'\n';
710 PrintObj(PMV.ShiftAmt);
714 PrintObj(PMV.Inv_Mask);
740 unsigned MinWordSize) {
741 PartwordMaskValues PMV;
746 unsigned ValueSize =
DL.getTypeStoreSize(
ValueType);
748 PMV.ValueType = PMV.IntValueType =
ValueType;
749 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
753 PMV.WordType = MinWordSize > ValueSize ?
Type::getIntNTy(Ctx, MinWordSize * 8)
755 if (PMV.ValueType == PMV.WordType) {
756 PMV.AlignedAddr =
Addr;
757 PMV.AlignedAddrAlignment = AddrAlign;
758 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
759 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0,
true);
763 PMV.AlignedAddrAlignment =
Align(MinWordSize);
765 assert(ValueSize < MinWordSize);
768 IntegerType *IntTy =
DL.getIndexType(Ctx, PtrTy->getAddressSpace());
771 if (AddrAlign < MinWordSize) {
773 Intrinsic::ptrmask, {PtrTy, IntTy},
774 {
Addr, ConstantInt::get(IntTy, ~(
uint64_t)(MinWordSize - 1))},
nullptr,
778 PtrLSB = Builder.
CreateAnd(AddrInt, MinWordSize - 1,
"PtrLSB");
781 PMV.AlignedAddr =
Addr;
785 if (
DL.isLittleEndian()) {
787 PMV.ShiftAmt = Builder.
CreateShl(PtrLSB, 3);
791 Builder.
CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
794 PMV.ShiftAmt = Builder.
CreateTrunc(PMV.ShiftAmt, PMV.WordType,
"ShiftAmt");
796 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
799 PMV.Inv_Mask = Builder.
CreateNot(PMV.Mask,
"Inv_Mask");
805 const PartwordMaskValues &PMV) {
806 assert(WideWord->
getType() == PMV.WordType &&
"Widened type mismatch");
807 if (PMV.WordType == PMV.ValueType)
816 Value *Updated,
const PartwordMaskValues &PMV) {
817 assert(WideWord->
getType() == PMV.WordType &&
"Widened type mismatch");
818 assert(Updated->
getType() == PMV.ValueType &&
"Value type mismatch");
819 if (PMV.WordType == PMV.ValueType)
826 Builder.
CreateShl(ZExt, PMV.ShiftAmt,
"shifted",
true);
838 const PartwordMaskValues &PMV) {
845 Value *FinalVal = Builder.
CreateOr(Loaded_MaskOut, Shifted_Inc);
859 Value *FinalVal = Builder.
CreateOr(Loaded_MaskOut, NewVal_Masked);
892void AtomicExpandImpl::expandPartwordAtomicRMW(
898 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
904 ReplacementIRBuilder Builder(AI, *
DL);
906 PartwordMaskValues PMV =
908 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
910 Value *ValOperand_Shifted =
nullptr;
915 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
916 "ValOperand_Shifted");
926 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
927 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
931 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
932 PMV.AlignedAddrAlignment, MemOpOrder,
945 Source.getAllMetadata(MD);
949 for (
auto [
ID,
N] : MD) {
951 case LLVMContext::MD_dbg:
952 case LLVMContext::MD_tbaa:
953 case LLVMContext::MD_tbaa_struct:
954 case LLVMContext::MD_alias_scope:
955 case LLVMContext::MD_noalias:
956 case LLVMContext::MD_access_group:
957 case LLVMContext::MD_mmra:
963 else if (
ID == Ctx.
getMDKindID(
"amdgpu.no.fine.grained.memory"))
973 ReplacementIRBuilder Builder(AI, *
DL);
978 "Unable to widen operation");
980 PartwordMaskValues PMV =
982 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
984 Value *ValOperand_Shifted =
986 PMV.ShiftAmt,
"ValOperand_Shifted");
992 Builder.
CreateOr(ValOperand_Shifted, PMV.Inv_Mask,
"AndOperand");
994 NewOperand = ValOperand_Shifted;
997 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1050 ReplacementIRBuilder Builder(CI, *
DL);
1061 std::prev(BB->
end())->eraseFromParent();
1064 PartwordMaskValues PMV =
1066 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1069 Value *NewVal_Shifted =
1071 Value *Cmp_Shifted =
1078 Value *InitLoaded_MaskOut = Builder.
CreateAnd(InitLoaded, PMV.Inv_Mask);
1084 Loaded_MaskOut->
addIncoming(InitLoaded_MaskOut, BB);
1087 Value *FullWord_NewVal = Builder.
CreateOr(Loaded_MaskOut, NewVal_Shifted);
1088 Value *FullWord_Cmp = Builder.
CreateOr(Loaded_MaskOut, Cmp_Shifted);
1090 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1118 Loaded_MaskOut->
addIncoming(OldVal_MaskOut, FailureBB);
1133void AtomicExpandImpl::expandAtomicOpToLLSC(
1137 ReplacementIRBuilder Builder(
I, *
DL);
1138 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType,
Addr, AddrAlign,
1139 MemOpOrder, PerformOp);
1141 I->replaceAllUsesWith(Loaded);
1142 I->eraseFromParent();
1145void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(
AtomicRMWInst *AI) {
1146 ReplacementIRBuilder Builder(AI, *
DL);
1148 PartwordMaskValues PMV =
1150 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1158 CastOp = Instruction::SExt;
1162 PMV.ShiftAmt,
"ValOperand_Shifted");
1163 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1164 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1171void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1173 ReplacementIRBuilder Builder(CI, *
DL);
1177 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1185 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1186 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1192 CmpVal_Shifted, Builder.
CreateAnd(OldVal, PMV.Mask),
"Success");
1199Value *AtomicExpandImpl::insertRMWLLSCLoop(
1208 F->getDataLayout().getTypeStoreSize(ResultTy) &&
1209 "Expected at least natural alignment at this point.");
1229 std::prev(BB->
end())->eraseFromParent();
1235 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy,
Addr, MemOpOrder);
1237 Value *NewVal = PerformOp(Builder, Loaded);
1239 Value *StoreSuccess =
1240 TLI->emitStoreConditional(Builder, NewVal,
Addr, MemOpOrder);
1258 M->getDataLayout());
1260 ReplacementIRBuilder Builder(CI, *
DL);
1272 LLVM_DEBUG(
dbgs() <<
"Replaced " << *CI <<
" with " << *NewCI <<
"\n");
1299 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1313 bool HasReleasedLoadBB = !CI->
isWeak() && ShouldInsertFencesForAtomic &&
1320 bool UseUnconditionalReleaseBarrier =
F->hasMinSize() && !CI->
isWeak();
1374 auto ReleasedLoadBB =
1378 auto ReleasingStoreBB =
1382 ReplacementIRBuilder Builder(CI, *
DL);
1387 std::prev(BB->
end())->eraseFromParent();
1389 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1390 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1392 PartwordMaskValues PMV =
1394 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1399 Value *UnreleasedLoad =
1400 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1401 Value *UnreleasedLoadExtract =
1408 Builder.
CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1411 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1412 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1417 Builder.
CreatePHI(PMV.WordType, 2,
"loaded.trystore");
1418 LoadedTryStore->
addIncoming(UnreleasedLoad, ReleasingStoreBB);
1419 Value *NewValueInsert =
1421 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1422 PMV.AlignedAddr, MemOpOrder);
1425 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1427 CI->
isWeak() ? FailureBB : RetryBB);
1431 if (HasReleasedLoadBB) {
1433 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1440 Builder.
CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1442 LoadedTryStore->
addIncoming(SecondLoad, ReleasedLoadBB);
1449 if (ShouldInsertFencesForAtomic ||
1450 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1451 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1457 LoadedNoStore->
addIncoming(UnreleasedLoad, StartBB);
1458 if (HasReleasedLoadBB)
1459 LoadedNoStore->
addIncoming(SecondLoad, ReleasedLoadBB);
1464 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1470 LoadedFailure->
addIncoming(LoadedNoStore, NoStoreBB);
1472 LoadedFailure->
addIncoming(LoadedTryStore, TryStoreBB);
1473 if (ShouldInsertFencesForAtomic)
1474 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1484 LoadedExit->
addIncoming(LoadedTryStore, SuccessBB);
1485 LoadedExit->
addIncoming(LoadedFailure, FailureBB);
1492 Value *LoadedFull = LoadedExit;
1506 "weird extraction from { iN, i1 }");
1517 for (
auto *EV : PrunedInsts)
1534bool AtomicExpandImpl::isIdempotentRMW(
AtomicRMWInst *RMWI) {
1547 return C->isMinusOne();
1554bool AtomicExpandImpl::simplifyIdempotentRMW(
AtomicRMWInst *RMWI) {
1555 if (
auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1556 tryExpandAtomicLoad(ResultingLoad);
1562Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1593 std::prev(BB->
end())->eraseFromParent();
1603 Value *NewVal = PerformOp(Builder, Loaded);
1605 Value *NewLoaded =
nullptr;
1608 CreateCmpXchg(Builder,
Addr, Loaded, NewVal, AddrAlign,
1624 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1627 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1631 if (ValueSize < MinCASSize)
1632 return expandPartwordCmpXchg(CI);
1635 return expandAtomicCmpXchg(CI);
1638 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1649 Builder.setIsFPConstrained(
1654 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1658 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1659 AI->getValOperand());
1682 unsigned LargestSize =
DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1683 return Alignment >=
Size &&
1685 Size <= LargestSize;
1688void AtomicExpandImpl::expandAtomicLoadToLibcall(
LoadInst *
I) {
1690 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1691 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1694 bool expanded = expandAtomicOpToLibcall(
1695 I,
Size,
I->getAlign(),
I->getPointerOperand(),
nullptr,
nullptr,
1701void AtomicExpandImpl::expandAtomicStoreToLibcall(
StoreInst *
I) {
1703 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1704 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1707 bool expanded = expandAtomicOpToLibcall(
1708 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getValueOperand(),
1716 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1717 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1718 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1721 bool expanded = expandAtomicOpToLibcall(
1722 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getNewValOperand(),
1723 I->getCompareOperand(),
I->getSuccessOrdering(),
I->getFailureOrdering(),
1731 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1732 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1733 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1735 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1736 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1737 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1739 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1740 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1741 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1743 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1744 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1745 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1747 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1748 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1749 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1751 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1752 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1753 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1755 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1756 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1757 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1792void AtomicExpandImpl::expandAtomicRMWToLibcall(
AtomicRMWInst *
I) {
1798 if (!Libcalls.
empty())
1799 Success = expandAtomicOpToLibcall(
1800 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getValOperand(),
1814 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1820 expandAtomicCASToLibcall(Pair);
1831bool AtomicExpandImpl::expandAtomicOpToLibcall(
1841 IRBuilder<> AllocaBuilder(&
I->getFunction()->getEntryBlock().front());
1846 const Align AllocaAlignment =
DL.getPrefTypeAlign(SizedIntTy);
1863 if (UseSizedLibcall) {
1866 RTLibType = Libcalls[1];
1869 RTLibType = Libcalls[2];
1872 RTLibType = Libcalls[3];
1875 RTLibType = Libcalls[4];
1878 RTLibType = Libcalls[5];
1881 }
else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1882 RTLibType = Libcalls[0];
1889 if (!TLI->getLibcallName(RTLibType)) {
1929 if (!UseSizedLibcall) {
1931 Args.push_back(ConstantInt::get(
DL.getIntPtrType(Ctx),
Size));
1939 Value *PtrVal = PointerOperand;
1941 Args.push_back(PtrVal);
1945 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->
getType());
1949 Args.push_back(AllocaCASExpected);
1954 if (UseSizedLibcall) {
1957 Args.push_back(IntValue);
1959 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->
getType());
1963 Args.push_back(AllocaValue);
1968 if (!CASExpected && HasResult && !UseSizedLibcall) {
1969 AllocaResult = AllocaBuilder.CreateAlloca(
I->getType());
1972 Args.push_back(AllocaResult);
1976 Args.push_back(OrderingVal);
1980 Args.push_back(Ordering2Val);
1986 }
else if (HasResult && UseSizedLibcall)
1987 ResultTy = SizedIntTy;
1993 for (
Value *Arg : Args)
1997 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1999 Call->setAttributes(Attr);
2003 if (ValueOperand && !UseSizedLibcall)
2009 Type *FinalResultTy =
I->getType();
2012 CASExpected->
getType(), AllocaCASExpected, AllocaAlignment);
2016 I->replaceAllUsesWith(V);
2017 }
else if (HasResult) {
2019 if (UseSizedLibcall)
2026 I->replaceAllUsesWith(V);
2028 I->eraseFromParent();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
Module.h This file contains the declarations for the Module class.
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
void setAlignment(Align Align)
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getNewValOperand()
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Value * getCompareOperand()
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Value * getPointerOperand()
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Min
*p = old <signed v ? old : v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Common base class shared among various IRBuilders.
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
UnreachableInst * CreateUnreachable()
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
BasicBlock::iterator GetInsertPoint() const
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
LLVMContext & getContext() const
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
unsigned getMDKindID(StringRef Name) const
getMDKindID - Return a unique non-zero ID for the specified metadata kind.
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
void initializeAtomicExpandLegacyPass(PassRegistry &)
bool canInstructionHaveMMRAs(const Instruction &I)
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
This struct is a compact representation of a valid (non-zero power of two) alignment.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.