61#define DEBUG_TYPE "atomic-expand"
65class AtomicExpandImpl {
73 bool tryExpandAtomicLoad(
LoadInst *LI);
74 bool expandAtomicLoadToLL(
LoadInst *LI);
75 bool expandAtomicLoadToCmpXchg(
LoadInst *LI);
85 void expandAtomicOpToLLSC(
89 void expandPartwordAtomicRMW(
97 static Value *insertRMWCmpXchgLoop(
113 void expandAtomicLoadToLibcall(
LoadInst *LI);
114 void expandAtomicStoreToLibcall(
StoreInst *LI);
140struct ReplacementIRBuilder
141 :
IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {
152 if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))
155 MMRAMD =
I->getMetadata(LLVMContext::MD_mmra);
160 I->setMetadata(LLVMContext::MD_mmra, MMRAMD);
166char AtomicExpandLegacy::ID = 0;
171 "Expand Atomic instructions",
false,
false)
179 return DL.getTypeStoreSize(LI->getType());
184 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
200template <
typename Inst>
203 Align Alignment =
I->getAlign();
204 return Alignment >=
Size &&
208bool AtomicExpandImpl::processAtomicInstr(
Instruction *
I) {
209 auto *LI = dyn_cast<LoadInst>(
I);
210 auto *
SI = dyn_cast<StoreInst>(
I);
211 auto *RMWI = dyn_cast<AtomicRMWInst>(
I);
212 auto *CASI = dyn_cast<AtomicCmpXchgInst>(
I);
214 bool MadeChange =
false;
222 expandAtomicLoadToLibcall(LI);
226 if (TLI->shouldCastAtomicLoadInIR(LI) ==
227 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
228 I = LI = convertAtomicLoadToIntegerType(LI);
236 expandAtomicStoreToLibcall(SI);
240 if (TLI->shouldCastAtomicStoreInIR(SI) ==
241 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
242 I =
SI = convertAtomicStoreToIntegerType(SI);
247 expandAtomicRMWToLibcall(RMWI);
251 if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==
252 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
253 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
258 expandAtomicCASToLibcall(CASI);
264 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
267 I = CASI = convertCmpXchgToIntegerType(CASI);
273 if (TLI->shouldInsertFencesForAtomic(
I)) {
274 auto FenceOrdering = AtomicOrdering::Monotonic;
276 FenceOrdering = LI->getOrdering();
277 LI->setOrdering(AtomicOrdering::Monotonic);
279 FenceOrdering =
SI->getOrdering();
280 SI->setOrdering(AtomicOrdering::Monotonic);
283 FenceOrdering = RMWI->getOrdering();
284 RMWI->setOrdering(AtomicOrdering::Monotonic);
286 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
287 TargetLoweringBase::AtomicExpansionKind::None &&
295 FenceOrdering = CASI->getMergedOrdering();
296 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
297 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
300 if (FenceOrdering != AtomicOrdering::Monotonic) {
301 MadeChange |= bracketInstWithFences(
I, FenceOrdering);
303 }
else if (
I->hasAtomicStore() &&
304 TLI->shouldInsertTrailingFenceForAtomicStore(
I)) {
305 auto FenceOrdering = AtomicOrdering::Monotonic;
307 FenceOrdering =
SI->getOrdering();
309 FenceOrdering = RMWI->getOrdering();
310 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
311 TargetLoweringBase::AtomicExpansionKind::LLSC)
313 FenceOrdering = CASI->getSuccessOrdering();
316 if (
auto TrailingFence =
317 TLI->emitTrailingFence(Builder,
I, FenceOrdering)) {
318 TrailingFence->moveAfter(
I);
324 MadeChange |= tryExpandAtomicLoad(LI);
326 MadeChange |= tryExpandAtomicStore(SI);
333 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
337 MadeChange |= tryExpandAtomicRMW(RMWI);
340 MadeChange |= tryExpandAtomicCmpXchg(CASI);
346 const auto *Subtarget =
TM->getSubtargetImpl(
F);
347 if (!Subtarget->enableAtomicExpand())
349 TLI = Subtarget->getTargetLowering();
350 DL = &
F.getDataLayout();
352 bool MadeChange =
false;
359 if (
I.isAtomic() && !isa<FenceInst>(&
I))
362 for (
auto *
I : AtomicInsts) {
363 if (processAtomicInstr(
I))
370bool AtomicExpandLegacy::runOnFunction(
Function &
F) {
372 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
377 return AE.run(
F, TM);
381 return new AtomicExpandLegacy();
388 bool Changed = AE.run(
F, TM);
395bool AtomicExpandImpl::bracketInstWithFences(
Instruction *
I,
397 ReplacementIRBuilder Builder(
I, *
DL);
399 auto LeadingFence = TLI->emitLeadingFence(Builder,
I, Order);
401 auto TrailingFence = TLI->emitTrailingFence(Builder,
I, Order);
405 TrailingFence->moveAfter(
I);
407 return (LeadingFence || TrailingFence);
413 EVT VT = TLI->getMemValueType(
DL,
T);
424 Type *NewTy = getCorrespondingIntegerType(LI->
getType(),
M->getDataLayout());
426 ReplacementIRBuilder Builder(LI, *
DL);
430 auto *NewLI = Builder.CreateLoad(NewTy,
Addr);
431 NewLI->setAlignment(LI->
getAlign());
434 LLVM_DEBUG(
dbgs() <<
"Replaced " << *LI <<
" with " << *NewLI <<
"\n");
436 Value *NewVal = Builder.CreateBitCast(NewLI, LI->
getType());
443AtomicExpandImpl::convertAtomicXchgToIntegerType(
AtomicRMWInst *RMWI) {
448 getCorrespondingIntegerType(RMWI->
getType(),
M->getDataLayout());
450 ReplacementIRBuilder Builder(RMWI, *
DL);
455 ? Builder.CreatePtrToInt(Val, NewTy)
456 : Builder.CreateBitCast(Val, NewTy);
462 LLVM_DEBUG(
dbgs() <<
"Replaced " << *RMWI <<
" with " << *NewRMWI <<
"\n");
465 ? Builder.CreateIntToPtr(NewRMWI, RMWI->
getType())
466 : Builder.CreateBitCast(NewRMWI, RMWI->
getType());
472bool AtomicExpandImpl::tryExpandAtomicLoad(
LoadInst *LI) {
473 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
477 expandAtomicOpToLLSC(
483 return expandAtomicLoadToLL(LI);
485 return expandAtomicLoadToCmpXchg(LI);
494bool AtomicExpandImpl::tryExpandAtomicStore(
StoreInst *SI) {
495 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
499 expandAtomicStore(SI);
509bool AtomicExpandImpl::expandAtomicLoadToLL(
LoadInst *LI) {
510 ReplacementIRBuilder Builder(LI, *
DL);
515 Value *Val = TLI->emitLoadLinked(Builder, LI->
getType(),
517 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
525bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(
LoadInst *LI) {
526 ReplacementIRBuilder Builder(LI, *
DL);
535 Value *Pair = Builder.CreateAtomicCmpXchg(
538 Value *Loaded = Builder.CreateExtractValue(Pair, 0,
"loaded");
555 ReplacementIRBuilder Builder(SI, *
DL);
556 auto *
M =
SI->getModule();
557 Type *NewTy = getCorrespondingIntegerType(
SI->getValueOperand()->getType(),
559 Value *NewVal = Builder.CreateBitCast(
SI->getValueOperand(), NewTy);
567 LLVM_DEBUG(
dbgs() <<
"Replaced " << *SI <<
" with " << *NewSI <<
"\n");
568 SI->eraseFromParent();
572void AtomicExpandImpl::expandAtomicStore(
StoreInst *SI) {
579 ReplacementIRBuilder Builder(SI, *
DL);
587 SI->getAlign(), RMWOrdering);
588 SI->eraseFromParent();
591 tryExpandAtomicRMW(AI);
610 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
619bool AtomicExpandImpl::tryExpandAtomicRMW(
AtomicRMWInst *AI) {
626 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
628 if (ValueSize < MinCASSize) {
629 expandPartwordAtomicRMW(AI,
642 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
644 if (ValueSize < MinCASSize) {
645 expandPartwordAtomicRMW(AI,
656 <<
"A compare and swap loop was generated for an atomic "
658 << MemScope <<
" memory scope";
665 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
667 if (ValueSize < MinCASSize) {
672 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
676 expandAtomicRMWToMaskedIntrinsic(AI);
680 TLI->emitBitTestAtomicRMWIntrinsic(AI);
684 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
690 TLI->emitExpandAtomicRMW(AI);
699struct PartwordMaskValues {
701 Type *WordType =
nullptr;
703 Type *IntValueType =
nullptr;
704 Value *AlignedAddr =
nullptr;
705 Align AlignedAddrAlignment;
707 Value *ShiftAmt =
nullptr;
709 Value *Inv_Mask =
nullptr;
714 auto PrintObj = [&
O](
auto *
V) {
721 O <<
"PartwordMaskValues {\n";
723 PrintObj(PMV.WordType);
725 PrintObj(PMV.ValueType);
726 O <<
" AlignedAddr: ";
727 PrintObj(PMV.AlignedAddr);
728 O <<
" AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() <<
'\n';
730 PrintObj(PMV.ShiftAmt);
734 PrintObj(PMV.Inv_Mask);
760 unsigned MinWordSize) {
761 PartwordMaskValues PMV;
766 unsigned ValueSize =
DL.getTypeStoreSize(
ValueType);
768 PMV.ValueType = PMV.IntValueType =
ValueType;
769 if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())
773 PMV.WordType = MinWordSize > ValueSize ?
Type::getIntNTy(Ctx, MinWordSize * 8)
775 if (PMV.ValueType == PMV.WordType) {
776 PMV.AlignedAddr =
Addr;
777 PMV.AlignedAddrAlignment = AddrAlign;
778 PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);
779 PMV.Mask = ConstantInt::get(PMV.ValueType, ~0,
true);
783 PMV.AlignedAddrAlignment =
Align(MinWordSize);
785 assert(ValueSize < MinWordSize);
788 IntegerType *IntTy =
DL.getIndexType(Ctx, PtrTy->getAddressSpace());
791 if (AddrAlign < MinWordSize) {
793 Intrinsic::ptrmask, {PtrTy, IntTy},
794 {
Addr, ConstantInt::get(IntTy, ~(
uint64_t)(MinWordSize - 1))},
nullptr,
798 PtrLSB = Builder.
CreateAnd(AddrInt, MinWordSize - 1,
"PtrLSB");
801 PMV.AlignedAddr =
Addr;
805 if (
DL.isLittleEndian()) {
807 PMV.ShiftAmt = Builder.
CreateShl(PtrLSB, 3);
811 Builder.
CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
814 PMV.ShiftAmt = Builder.
CreateTrunc(PMV.ShiftAmt, PMV.WordType,
"ShiftAmt");
816 ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,
819 PMV.Inv_Mask = Builder.
CreateNot(PMV.Mask,
"Inv_Mask");
825 const PartwordMaskValues &PMV) {
826 assert(WideWord->
getType() == PMV.WordType &&
"Widened type mismatch");
827 if (PMV.WordType == PMV.ValueType)
836 Value *Updated,
const PartwordMaskValues &PMV) {
837 assert(WideWord->
getType() == PMV.WordType &&
"Widened type mismatch");
838 assert(Updated->
getType() == PMV.ValueType &&
"Value type mismatch");
839 if (PMV.WordType == PMV.ValueType)
846 Builder.
CreateShl(ZExt, PMV.ShiftAmt,
"shifted",
true);
858 const PartwordMaskValues &PMV) {
865 Value *FinalVal = Builder.
CreateOr(Loaded_MaskOut, Shifted_Inc);
879 Value *FinalVal = Builder.
CreateOr(Loaded_MaskOut, NewVal_Masked);
912void AtomicExpandImpl::expandPartwordAtomicRMW(
918 tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));
924 ReplacementIRBuilder Builder(AI, *
DL);
926 PartwordMaskValues PMV =
928 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
930 Value *ValOperand_Shifted =
nullptr;
935 Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,
936 "ValOperand_Shifted");
946 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
947 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
951 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
952 PMV.AlignedAddrAlignment, MemOpOrder,
965 Source.getAllMetadata(MD);
969 for (
auto [
ID,
N] : MD) {
971 case LLVMContext::MD_dbg:
972 case LLVMContext::MD_tbaa:
973 case LLVMContext::MD_tbaa_struct:
974 case LLVMContext::MD_alias_scope:
975 case LLVMContext::MD_noalias:
976 case LLVMContext::MD_access_group:
977 case LLVMContext::MD_mmra:
983 else if (
ID == Ctx.
getMDKindID(
"amdgpu.no.fine.grained.memory"))
993 ReplacementIRBuilder Builder(AI, *
DL);
998 "Unable to widen operation");
1000 PartwordMaskValues PMV =
1002 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1004 Value *ValOperand_Shifted =
1006 PMV.ShiftAmt,
"ValOperand_Shifted");
1012 Builder.
CreateOr(ValOperand_Shifted, PMV.Inv_Mask,
"AndOperand");
1014 NewOperand = ValOperand_Shifted;
1017 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
1070 ReplacementIRBuilder Builder(CI, *
DL);
1081 std::prev(BB->
end())->eraseFromParent();
1084 PartwordMaskValues PMV =
1086 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1089 Value *NewVal_Shifted =
1091 Value *Cmp_Shifted =
1098 Value *InitLoaded_MaskOut = Builder.
CreateAnd(InitLoaded, PMV.Inv_Mask);
1104 Loaded_MaskOut->
addIncoming(InitLoaded_MaskOut, BB);
1107 Value *FullWord_NewVal = Builder.
CreateOr(Loaded_MaskOut, NewVal_Shifted);
1108 Value *FullWord_Cmp = Builder.
CreateOr(Loaded_MaskOut, Cmp_Shifted);
1110 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1138 Loaded_MaskOut->
addIncoming(OldVal_MaskOut, FailureBB);
1153void AtomicExpandImpl::expandAtomicOpToLLSC(
1157 ReplacementIRBuilder Builder(
I, *
DL);
1158 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType,
Addr, AddrAlign,
1159 MemOpOrder, PerformOp);
1161 I->replaceAllUsesWith(Loaded);
1162 I->eraseFromParent();
1165void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(
AtomicRMWInst *AI) {
1166 ReplacementIRBuilder Builder(AI, *
DL);
1168 PartwordMaskValues PMV =
1170 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1178 CastOp = Instruction::SExt;
1182 PMV.ShiftAmt,
"ValOperand_Shifted");
1183 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1184 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1191void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(
1193 ReplacementIRBuilder Builder(CI, *
DL);
1197 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1205 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1206 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1212 CmpVal_Shifted, Builder.
CreateAnd(OldVal, PMV.Mask),
"Success");
1219Value *AtomicExpandImpl::insertRMWLLSCLoop(
1228 F->getDataLayout().getTypeStoreSize(ResultTy) &&
1229 "Expected at least natural alignment at this point.");
1249 std::prev(BB->
end())->eraseFromParent();
1255 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy,
Addr, MemOpOrder);
1257 Value *NewVal = PerformOp(Builder, Loaded);
1259 Value *StoreSuccess =
1260 TLI->emitStoreConditional(Builder, NewVal,
Addr, MemOpOrder);
1278 M->getDataLayout());
1280 ReplacementIRBuilder Builder(CI, *
DL);
1292 LLVM_DEBUG(
dbgs() <<
"Replaced " << *CI <<
" with " << *NewCI <<
"\n");
1319 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1333 bool HasReleasedLoadBB = !CI->
isWeak() && ShouldInsertFencesForAtomic &&
1340 bool UseUnconditionalReleaseBarrier =
F->hasMinSize() && !CI->
isWeak();
1394 auto ReleasedLoadBB =
1398 auto ReleasingStoreBB =
1402 ReplacementIRBuilder Builder(CI, *
DL);
1407 std::prev(BB->
end())->eraseFromParent();
1409 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1410 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1412 PartwordMaskValues PMV =
1414 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1419 Value *UnreleasedLoad =
1420 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1421 Value *UnreleasedLoadExtract =
1428 Builder.
CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1431 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1432 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1437 Builder.
CreatePHI(PMV.WordType, 2,
"loaded.trystore");
1438 LoadedTryStore->
addIncoming(UnreleasedLoad, ReleasingStoreBB);
1439 Value *NewValueInsert =
1441 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1442 PMV.AlignedAddr, MemOpOrder);
1445 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1447 CI->
isWeak() ? FailureBB : RetryBB);
1451 if (HasReleasedLoadBB) {
1453 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1460 Builder.
CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1462 LoadedTryStore->
addIncoming(SecondLoad, ReleasedLoadBB);
1469 if (ShouldInsertFencesForAtomic ||
1470 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1471 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1477 LoadedNoStore->
addIncoming(UnreleasedLoad, StartBB);
1478 if (HasReleasedLoadBB)
1479 LoadedNoStore->
addIncoming(SecondLoad, ReleasedLoadBB);
1484 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1490 LoadedFailure->
addIncoming(LoadedNoStore, NoStoreBB);
1492 LoadedFailure->
addIncoming(LoadedTryStore, TryStoreBB);
1493 if (ShouldInsertFencesForAtomic)
1494 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1504 LoadedExit->
addIncoming(LoadedTryStore, SuccessBB);
1505 LoadedExit->
addIncoming(LoadedFailure, FailureBB);
1512 Value *LoadedFull = LoadedExit;
1526 "weird extraction from { iN, i1 }");
1537 for (
auto *EV : PrunedInsts)
1554bool AtomicExpandImpl::isIdempotentRMW(
AtomicRMWInst *RMWI) {
1567 return C->isMinusOne();
1574bool AtomicExpandImpl::simplifyIdempotentRMW(
AtomicRMWInst *RMWI) {
1575 if (
auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1576 tryExpandAtomicLoad(ResultingLoad);
1582Value *AtomicExpandImpl::insertRMWCmpXchgLoop(
1613 std::prev(BB->
end())->eraseFromParent();
1623 Value *NewVal = PerformOp(Builder, Loaded);
1625 Value *NewLoaded =
nullptr;
1628 CreateCmpXchg(Builder,
Addr, Loaded, NewVal, AddrAlign,
1644 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1647 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1651 if (ValueSize < MinCASSize)
1652 return expandPartwordCmpXchg(CI);
1655 return expandAtomicCmpXchg(CI);
1658 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1669 Builder.setIsFPConstrained(
1674 Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(
1678 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1679 AI->getValOperand());
1702 unsigned LargestSize =
DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1703 return Alignment >=
Size &&
1705 Size <= LargestSize;
1708void AtomicExpandImpl::expandAtomicLoadToLibcall(
LoadInst *
I) {
1710 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1711 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1714 bool expanded = expandAtomicOpToLibcall(
1715 I,
Size,
I->getAlign(),
I->getPointerOperand(),
nullptr,
nullptr,
1721void AtomicExpandImpl::expandAtomicStoreToLibcall(
StoreInst *
I) {
1723 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1724 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1727 bool expanded = expandAtomicOpToLibcall(
1728 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getValueOperand(),
1736 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1737 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1738 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1741 bool expanded = expandAtomicOpToLibcall(
1742 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getNewValOperand(),
1743 I->getCompareOperand(),
I->getSuccessOrdering(),
I->getFailureOrdering(),
1751 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1752 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1753 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1755 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1756 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1757 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1759 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1760 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1761 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1763 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1764 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1765 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1767 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1768 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1769 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1771 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1772 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1773 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1775 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1776 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1777 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1812void AtomicExpandImpl::expandAtomicRMWToLibcall(
AtomicRMWInst *
I) {
1818 if (!Libcalls.
empty())
1819 Success = expandAtomicOpToLibcall(
1820 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getValOperand(),
1834 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1840 expandAtomicCASToLibcall(Pair);
1851bool AtomicExpandImpl::expandAtomicOpToLibcall(
1861 IRBuilder<> AllocaBuilder(&
I->getFunction()->getEntryBlock().front());
1866 const Align AllocaAlignment =
DL.getPrefTypeAlign(SizedIntTy);
1883 if (UseSizedLibcall) {
1886 RTLibType = Libcalls[1];
1889 RTLibType = Libcalls[2];
1892 RTLibType = Libcalls[3];
1895 RTLibType = Libcalls[4];
1898 RTLibType = Libcalls[5];
1901 }
else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1902 RTLibType = Libcalls[0];
1909 if (!TLI->getLibcallName(RTLibType)) {
1949 if (!UseSizedLibcall) {
1951 Args.push_back(ConstantInt::get(
DL.getIntPtrType(Ctx),
Size));
1959 Value *PtrVal = PointerOperand;
1961 Args.push_back(PtrVal);
1965 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->
getType());
1969 Args.push_back(AllocaCASExpected);
1974 if (UseSizedLibcall) {
1977 Args.push_back(IntValue);
1979 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->
getType());
1983 Args.push_back(AllocaValue);
1988 if (!CASExpected && HasResult && !UseSizedLibcall) {
1989 AllocaResult = AllocaBuilder.CreateAlloca(
I->getType());
1992 Args.push_back(AllocaResult);
1996 Args.push_back(OrderingVal);
2000 Args.push_back(Ordering2Val);
2006 }
else if (HasResult && UseSizedLibcall)
2007 ResultTy = SizedIntTy;
2013 for (
Value *Arg : Args)
2017 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
2019 Call->setAttributes(Attr);
2023 if (ValueOperand && !UseSizedLibcall)
2029 Type *FinalResultTy =
I->getType();
2032 CASExpected->
getType(), AllocaCASExpected, AllocaAlignment);
2036 I->replaceAllUsesWith(V);
2037 }
else if (HasResult) {
2039 if (UseSizedLibcall)
2046 I->replaceAllUsesWith(V);
2048 I->eraseFromParent();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Expand Atomic instructions
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file provides utility for Memory Model Relaxation Annotations (MMRAs).
Module.h This file contains the declarations for the Module class.
#define INITIALIZE_PASS_DEPENDENCY(depName)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
void setAlignment(Align Align)
A container for analyses that lazily runs them and caches their results.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getNewValOperand()
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Value * getCompareOperand()
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Value * getPointerOperand()
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Min
*p = old <signed v ? old : v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Common base class shared among various IRBuilders.
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
UnreachableInst * CreateUnreachable()
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
BasicBlock::iterator GetInsertPoint() const
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
LLVMContext & getContext() const
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
unsigned getMDKindID(StringRef Name) const
getMDKindID - Return a unique non-zero ID for the specified metadata kind.
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isVectorTy() const
True if this is an instance of VectorType.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
LLVMContext & getContext() const
All values hold a context through their type.
An efficient, type-erasing, non-owning reference to a callable.
const ParentTy * getParent() const
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
void initializeAtomicExpandLegacyPass(PassRegistry &)
bool canInstructionHaveMMRAs(const Instruction &I)
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
This struct is a compact representation of a valid (non-zero power of two) alignment.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.