58#define DEBUG_TYPE "atomic-expand"
79 bool tryExpandAtomicLoad(
LoadInst *LI);
80 bool expandAtomicLoadToLL(
LoadInst *LI);
81 bool expandAtomicLoadToCmpXchg(
LoadInst *LI);
91 void expandAtomicOpToLLSC(
95 void expandPartwordAtomicRMW(
103 static Value *insertRMWCmpXchgLoop(
119 void expandAtomicLoadToLibcall(
LoadInst *LI);
120 void expandAtomicStoreToLibcall(
StoreInst *LI);
130struct ReplacementIRBuilder :
IRBuilder<InstSimplifyFolder> {
141char AtomicExpand::ID = 0;
153 return DL.getTypeStoreSize(LI->
getType());
157 const DataLayout &
DL = SI->getModule()->getDataLayout();
158 return DL.getTypeStoreSize(SI->getValueOperand()->getType());
174template <
typename Inst>
177 Align Alignment =
I->getAlign();
178 return Alignment >=
Size &&
182bool AtomicExpand::runOnFunction(
Function &
F) {
183 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
188 const auto *Subtarget =
TM.getSubtargetImpl(
F);
189 if (!Subtarget->enableAtomicExpand())
191 TLI = Subtarget->getTargetLowering();
192 DL = &
F.getParent()->getDataLayout();
199 if (
I.isAtomic() && !isa<FenceInst>(&
I))
202 bool MadeChange =
false;
203 for (
auto *
I : AtomicInsts) {
204 auto LI = dyn_cast<LoadInst>(
I);
205 auto SI = dyn_cast<StoreInst>(
I);
206 auto RMWI = dyn_cast<AtomicRMWInst>(
I);
207 auto CASI = dyn_cast<AtomicCmpXchgInst>(
I);
208 assert((LI || SI || RMWI || CASI) &&
"Unknown atomic instruction");
213 expandAtomicLoadToLibcall(LI);
219 expandAtomicStoreToLibcall(SI);
225 expandAtomicRMWToLibcall(RMWI);
231 expandAtomicCASToLibcall(CASI);
237 if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
238 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
239 I = LI = convertAtomicLoadToIntegerType(LI);
242 TLI->shouldCastAtomicStoreInIR(SI) ==
243 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
244 I =
SI = convertAtomicStoreToIntegerType(SI);
247 TLI->shouldCastAtomicRMWIInIR(RMWI) ==
248 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
249 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
254 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
257 I = CASI = convertCmpXchgToIntegerType(CASI);
262 if (TLI->shouldInsertFencesForAtomic(
I)) {
263 auto FenceOrdering = AtomicOrdering::Monotonic;
265 FenceOrdering = LI->getOrdering();
266 LI->setOrdering(AtomicOrdering::Monotonic);
268 FenceOrdering =
SI->getOrdering();
269 SI->setOrdering(AtomicOrdering::Monotonic);
272 FenceOrdering = RMWI->getOrdering();
273 RMWI->setOrdering(AtomicOrdering::Monotonic);
275 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
276 TargetLoweringBase::AtomicExpansionKind::None &&
284 FenceOrdering = CASI->getMergedOrdering();
285 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
286 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
289 if (FenceOrdering != AtomicOrdering::Monotonic) {
290 MadeChange |= bracketInstWithFences(
I, FenceOrdering);
292 }
else if (
I->hasAtomicStore() &&
293 TLI->shouldInsertTrailingFenceForAtomicStore(
I)) {
294 auto FenceOrdering = AtomicOrdering::Monotonic;
296 FenceOrdering =
SI->getOrdering();
298 FenceOrdering = RMWI->getOrdering();
299 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
300 TargetLoweringBase::AtomicExpansionKind::LLSC)
302 FenceOrdering = CASI->getSuccessOrdering();
305 if (
auto TrailingFence =
306 TLI->emitTrailingFence(Builder,
I, FenceOrdering)) {
307 TrailingFence->moveAfter(
I);
313 MadeChange |= tryExpandAtomicLoad(LI);
315 MadeChange |= tryExpandAtomicStore(SI);
322 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
326 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
328 if (ValueSize < MinCASSize &&
331 RMWI = widenPartwordAtomicRMW(RMWI);
335 MadeChange |= tryExpandAtomicRMW(RMWI);
338 MadeChange |= tryExpandAtomicCmpXchg(CASI);
344 ReplacementIRBuilder Builder(
I, *
DL);
346 auto LeadingFence = TLI->emitLeadingFence(Builder,
I, Order);
348 auto TrailingFence = TLI->emitTrailingFence(Builder,
I, Order);
352 TrailingFence->moveAfter(
I);
354 return (LeadingFence || TrailingFence);
360 EVT VT = TLI->getMemValueType(
DL,
T);
371 Type *NewTy = getCorrespondingIntegerType(LI->
getType(),
M->getDataLayout());
373 ReplacementIRBuilder Builder(LI, *
DL);
377 auto *NewLI = Builder.CreateLoad(NewTy,
Addr);
378 NewLI->setAlignment(LI->
getAlign());
381 LLVM_DEBUG(
dbgs() <<
"Replaced " << *LI <<
" with " << *NewLI <<
"\n");
383 Value *NewVal = Builder.CreateBitCast(NewLI, LI->
getType());
390AtomicExpand::convertAtomicXchgToIntegerType(
AtomicRMWInst *RMWI) {
393 getCorrespondingIntegerType(RMWI->
getType(),
M->getDataLayout());
395 ReplacementIRBuilder Builder(RMWI, *
DL);
400 ? Builder.CreatePtrToInt(Val, NewTy)
401 : Builder.CreateBitCast(Val, NewTy);
407 LLVM_DEBUG(
dbgs() <<
"Replaced " << *RMWI <<
" with " << *NewRMWI <<
"\n");
410 ? Builder.CreateIntToPtr(NewRMWI, RMWI->
getType())
411 : Builder.CreateBitCast(NewRMWI, RMWI->
getType());
417bool AtomicExpand::tryExpandAtomicLoad(
LoadInst *LI) {
418 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
419 case TargetLoweringBase::AtomicExpansionKind::None:
421 case TargetLoweringBase::AtomicExpansionKind::LLSC:
422 expandAtomicOpToLLSC(
427 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
428 return expandAtomicLoadToLL(LI);
429 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
430 return expandAtomicLoadToCmpXchg(LI);
431 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
432 LI->
setAtomic(AtomicOrdering::NotAtomic);
439bool AtomicExpand::tryExpandAtomicStore(
StoreInst *SI) {
440 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
441 case TargetLoweringBase::AtomicExpansionKind::None:
443 case TargetLoweringBase::AtomicExpansionKind::Expand:
444 expandAtomicStore(SI);
446 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
447 SI->setAtomic(AtomicOrdering::NotAtomic);
454bool AtomicExpand::expandAtomicLoadToLL(
LoadInst *LI) {
455 ReplacementIRBuilder Builder(LI, *
DL);
460 Value *Val = TLI->emitLoadLinked(Builder, LI->
getType(),
462 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
470bool AtomicExpand::expandAtomicLoadToCmpXchg(
LoadInst *LI) {
471 ReplacementIRBuilder Builder(LI, *
DL);
473 if (Order == AtomicOrdering::Unordered)
474 Order = AtomicOrdering::Monotonic;
480 Value *Pair = Builder.CreateAtomicCmpXchg(
483 Value *Loaded = Builder.CreateExtractValue(Pair, 0,
"loaded");
500 ReplacementIRBuilder Builder(SI, *
DL);
501 auto *
M =
SI->getModule();
502 Type *NewTy = getCorrespondingIntegerType(
SI->getValueOperand()->getType(),
504 Value *NewVal = Builder.CreateBitCast(
SI->getValueOperand(), NewTy);
512 LLVM_DEBUG(
dbgs() <<
"Replaced " << *SI <<
" with " << *NewSI <<
"\n");
513 SI->eraseFromParent();
517void AtomicExpand::expandAtomicStore(
StoreInst *SI) {
524 ReplacementIRBuilder Builder(SI, *
DL);
526 assert(Ordering != AtomicOrdering::NotAtomic);
527 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
528 ? AtomicOrdering::Monotonic
532 SI->getAlign(), RMWOrdering);
533 SI->eraseFromParent();
536 tryExpandAtomicRMW(AI);
555 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
568 case TargetLoweringBase::AtomicExpansionKind::None:
570 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
571 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
573 if (ValueSize < MinCASSize) {
574 expandPartwordAtomicRMW(AI,
575 TargetLoweringBase::AtomicExpansionKind::LLSC);
586 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
587 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
589 if (ValueSize < MinCASSize) {
590 expandPartwordAtomicRMW(AI,
591 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
601 <<
"A compare and swap loop was generated for an atomic "
603 << MemScope <<
" memory scope";
609 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
610 expandAtomicRMWToMaskedIntrinsic(AI);
613 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
614 TLI->emitBitTestAtomicRMWIntrinsic(AI);
617 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
618 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
621 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
623 case TargetLoweringBase::AtomicExpansionKind::Expand:
624 TLI->emitExpandAtomicRMW(AI);
633struct PartwordMaskValues {
635 Type *WordType =
nullptr;
637 Type *IntValueType =
nullptr;
638 Value *AlignedAddr =
nullptr;
639 Align AlignedAddrAlignment;
641 Value *ShiftAmt =
nullptr;
643 Value *Inv_Mask =
nullptr;
648 auto PrintObj = [&
O](
auto *
V) {
655 O <<
"PartwordMaskValues {\n";
657 PrintObj(PMV.WordType);
659 PrintObj(PMV.ValueType);
660 O <<
" AlignedAddr: ";
661 PrintObj(PMV.AlignedAddr);
662 O <<
" AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() <<
'\n';
664 PrintObj(PMV.ShiftAmt);
668 PrintObj(PMV.Inv_Mask);
694 unsigned MinWordSize) {
695 PartwordMaskValues PMV;
700 unsigned ValueSize =
DL.getTypeStoreSize(
ValueType);
702 PMV.ValueType = PMV.IntValueType =
ValueType;
703 if (PMV.ValueType->isFloatingPointTy())
707 PMV.WordType = MinWordSize > ValueSize ?
Type::getIntNTy(Ctx, MinWordSize * 8)
709 if (PMV.ValueType == PMV.WordType) {
710 PMV.AlignedAddr =
Addr;
711 PMV.AlignedAddrAlignment = AddrAlign;
717 PMV.AlignedAddrAlignment =
Align(MinWordSize);
719 assert(ValueSize < MinWordSize);
722 IntegerType *IntTy =
DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
725 if (AddrAlign < MinWordSize) {
727 Intrinsic::ptrmask, {PtrTy, IntTy},
732 PtrLSB = Builder.
CreateAnd(AddrInt, MinWordSize - 1,
"PtrLSB");
735 PMV.AlignedAddr =
Addr;
736 PtrLSB = ConstantInt::getNullValue(IntTy);
739 if (
DL.isLittleEndian()) {
741 PMV.ShiftAmt = Builder.
CreateShl(PtrLSB, 3);
745 Builder.
CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
748 PMV.ShiftAmt = Builder.
CreateTrunc(PMV.ShiftAmt, PMV.WordType,
"ShiftAmt");
753 PMV.Inv_Mask = Builder.
CreateNot(PMV.Mask,
"Inv_Mask");
759 const PartwordMaskValues &PMV) {
760 assert(WideWord->
getType() == PMV.WordType &&
"Widened type mismatch");
761 if (PMV.WordType == PMV.ValueType)
770 Value *Updated,
const PartwordMaskValues &PMV) {
771 assert(WideWord->
getType() == PMV.WordType &&
"Widened type mismatch");
772 assert(Updated->
getType() == PMV.ValueType &&
"Value type mismatch");
773 if (PMV.WordType == PMV.ValueType)
780 Builder.
CreateShl(ZExt, PMV.ShiftAmt,
"shifted",
true);
792 const PartwordMaskValues &PMV) {
799 Value *FinalVal = Builder.
CreateOr(Loaded_MaskOut, Shifted_Inc);
813 Value *FinalVal = Builder.
CreateOr(Loaded_MaskOut, NewVal_Masked);
846void AtomicExpand::expandPartwordAtomicRMW(
851 ReplacementIRBuilder Builder(AI, *
DL);
853 PartwordMaskValues PMV =
855 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
857 Value *ValOperand_Shifted =
nullptr;
863 Builder.CreateShl(Builder.CreateZExt(AI->
getValOperand(), PMV.WordType),
864 PMV.ShiftAmt,
"ValOperand_Shifted");
873 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
874 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
875 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
878 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
879 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
880 PMV.AlignedAddrAlignment, MemOpOrder,
891 ReplacementIRBuilder Builder(AI, *
DL);
896 "Unable to widen operation");
898 PartwordMaskValues PMV =
900 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
902 Value *ValOperand_Shifted =
904 PMV.ShiftAmt,
"ValOperand_Shifted");
910 Builder.
CreateOr(PMV.Inv_Mask, ValOperand_Shifted,
"AndOperand");
912 NewOperand = ValOperand_Shifted;
915 Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,
967 ReplacementIRBuilder Builder(CI, *
DL);
978 std::prev(BB->
end())->eraseFromParent();
981 PartwordMaskValues PMV =
983 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
986 Value *NewVal_Shifted =
995 Value *InitLoaded_MaskOut = Builder.
CreateAnd(InitLoaded, PMV.Inv_Mask);
1001 Loaded_MaskOut->
addIncoming(InitLoaded_MaskOut, BB);
1004 Value *FullWord_NewVal = Builder.
CreateOr(Loaded_MaskOut, NewVal_Shifted);
1005 Value *FullWord_Cmp = Builder.
CreateOr(Loaded_MaskOut, Cmp_Shifted);
1007 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1035 Loaded_MaskOut->
addIncoming(OldVal_MaskOut, FailureBB);
1050void AtomicExpand::expandAtomicOpToLLSC(
1054 ReplacementIRBuilder Builder(
I, *
DL);
1055 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType,
Addr, AddrAlign,
1056 MemOpOrder, PerformOp);
1058 I->replaceAllUsesWith(Loaded);
1059 I->eraseFromParent();
1062void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(
AtomicRMWInst *AI) {
1063 ReplacementIRBuilder Builder(AI, *
DL);
1065 PartwordMaskValues PMV =
1067 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1075 CastOp = Instruction::SExt;
1079 PMV.ShiftAmt,
"ValOperand_Shifted");
1080 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1081 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1089 ReplacementIRBuilder Builder(CI, *
DL);
1093 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1101 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1102 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1108 CmpVal_Shifted, Builder.
CreateAnd(OldVal, PMV.Mask),
"Success");
1115Value *AtomicExpand::insertRMWLLSCLoop(
1124 F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1125 "Expected at least natural alignment at this point.");
1145 std::prev(BB->
end())->eraseFromParent();
1151 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy,
Addr, MemOpOrder);
1153 Value *NewVal = PerformOp(Builder, Loaded);
1155 Value *StoreSuccess =
1156 TLI->emitStoreConditional(Builder, NewVal,
Addr, MemOpOrder);
1174 M->getDataLayout());
1176 ReplacementIRBuilder Builder(CI, *
DL);
1188 LLVM_DEBUG(
dbgs() <<
"Replaced " << *CI <<
" with " << *NewCI <<
"\n");
1215 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1217 ? AtomicOrdering::Monotonic
1229 bool HasReleasedLoadBB = !CI->
isWeak() && ShouldInsertFencesForAtomic &&
1230 SuccessOrder != AtomicOrdering::Monotonic &&
1231 SuccessOrder != AtomicOrdering::Acquire &&
1236 bool UseUnconditionalReleaseBarrier =
F->hasMinSize() && !CI->
isWeak();
1290 auto ReleasedLoadBB =
1294 auto ReleasingStoreBB =
1298 ReplacementIRBuilder Builder(CI, *
DL);
1303 std::prev(BB->
end())->eraseFromParent();
1305 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1306 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1308 PartwordMaskValues PMV =
1310 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1315 Value *UnreleasedLoad =
1316 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1317 Value *UnreleasedLoadExtract =
1324 Builder.
CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1327 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1328 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1333 Builder.
CreatePHI(PMV.WordType, 2,
"loaded.trystore");
1334 LoadedTryStore->
addIncoming(UnreleasedLoad, ReleasingStoreBB);
1335 Value *NewValueInsert =
1337 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1338 PMV.AlignedAddr, MemOpOrder);
1341 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1343 CI->
isWeak() ? FailureBB : RetryBB);
1347 if (HasReleasedLoadBB) {
1349 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1356 Builder.
CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1358 LoadedTryStore->
addIncoming(SecondLoad, ReleasedLoadBB);
1365 if (ShouldInsertFencesForAtomic ||
1366 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1367 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1373 LoadedNoStore->
addIncoming(UnreleasedLoad, StartBB);
1374 if (HasReleasedLoadBB)
1375 LoadedNoStore->
addIncoming(SecondLoad, ReleasedLoadBB);
1380 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1386 LoadedFailure->
addIncoming(LoadedNoStore, NoStoreBB);
1388 LoadedFailure->
addIncoming(LoadedTryStore, TryStoreBB);
1389 if (ShouldInsertFencesForAtomic)
1390 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1400 LoadedExit->
addIncoming(LoadedTryStore, SuccessBB);
1401 LoadedExit->
addIncoming(LoadedFailure, FailureBB);
1408 Value *LoadedFull = LoadedExit;
1422 "weird extraction from { iN, i1 }");
1433 for (
auto *EV : PrunedInsts)
1463 return C->isMinusOne();
1470bool AtomicExpand::simplifyIdempotentRMW(
AtomicRMWInst *RMWI) {
1471 if (
auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1472 tryExpandAtomicLoad(ResultingLoad);
1478Value *AtomicExpand::insertRMWCmpXchgLoop(
1509 std::prev(BB->
end())->eraseFromParent();
1519 Value *NewVal = PerformOp(Builder, Loaded);
1521 Value *NewLoaded =
nullptr;
1524 CreateCmpXchg(Builder,
Addr, Loaded, NewVal, AddrAlign,
1525 MemOpOrder == AtomicOrdering::Unordered
1526 ? AtomicOrdering::Monotonic
1540 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1543 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1546 case TargetLoweringBase::AtomicExpansionKind::None:
1547 if (ValueSize < MinCASSize)
1548 return expandPartwordCmpXchg(CI);
1550 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1551 return expandAtomicCmpXchg(CI);
1553 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1554 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1556 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1565 Builder.setIsFPConstrained(
1570 Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1574 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1575 AI->getValOperand());
1598 unsigned LargestSize =
DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1599 return Alignment >=
Size &&
1601 Size <= LargestSize;
1604void AtomicExpand::expandAtomicLoadToLibcall(
LoadInst *
I) {
1606 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1607 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1610 bool expanded = expandAtomicOpToLibcall(
1611 I,
Size,
I->getAlign(),
I->getPointerOperand(),
nullptr,
nullptr,
1612 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1617void AtomicExpand::expandAtomicStoreToLibcall(
StoreInst *
I) {
1619 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1620 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1623 bool expanded = expandAtomicOpToLibcall(
1624 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getValueOperand(),
1625 nullptr,
I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1632 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1633 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1634 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1637 bool expanded = expandAtomicOpToLibcall(
1638 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getNewValOperand(),
1639 I->getCompareOperand(),
I->getSuccessOrdering(),
I->getFailureOrdering(),
1647 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1648 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1649 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1651 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1652 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1653 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1655 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1656 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1657 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1659 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1660 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1661 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1663 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1664 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1665 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1667 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1668 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1669 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1671 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1672 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1673 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1714 if (!Libcalls.
empty())
1715 Success = expandAtomicOpToLibcall(
1716 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getValOperand(),
1717 nullptr,
I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1730 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1736 expandAtomicCASToLibcall(Pair);
1747bool AtomicExpand::expandAtomicOpToLibcall(
1757 IRBuilder<> AllocaBuilder(&
I->getFunction()->getEntryBlock().front());
1762 const Align AllocaAlignment =
DL.getPrefTypeAlign(SizedIntTy);
1767 assert(Ordering != AtomicOrdering::NotAtomic &&
"expect atomic MO");
1772 assert(Ordering2 != AtomicOrdering::NotAtomic &&
"expect atomic MO");
1779 if (UseSizedLibcall) {
1782 RTLibType = Libcalls[1];
1785 RTLibType = Libcalls[2];
1788 RTLibType = Libcalls[3];
1791 RTLibType = Libcalls[4];
1794 RTLibType = Libcalls[5];
1797 }
else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1798 RTLibType = Libcalls[0];
1805 if (!TLI->getLibcallName(RTLibType)) {
1845 if (!UseSizedLibcall) {
1855 Value *PtrVal = PointerOperand;
1857 Args.push_back(PtrVal);
1861 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->
getType());
1865 Args.push_back(AllocaCASExpected);
1870 if (UseSizedLibcall) {
1873 Args.push_back(IntValue);
1875 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->
getType());
1879 Args.push_back(AllocaValue);
1884 if (!CASExpected && HasResult && !UseSizedLibcall) {
1885 AllocaResult = AllocaBuilder.CreateAlloca(
I->getType());
1888 Args.push_back(AllocaResult);
1892 Args.push_back(OrderingVal);
1896 Args.push_back(Ordering2Val);
1902 }
else if (HasResult && UseSizedLibcall)
1903 ResultTy = SizedIntTy;
1909 for (
Value *Arg : Args)
1911 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys,
false);
1913 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1915 Call->setAttributes(Attr);
1919 if (ValueOperand && !UseSizedLibcall)
1925 Type *FinalResultTy =
I->getType();
1928 CASExpected->
getType(), AllocaCASExpected, AllocaAlignment);
1932 I->replaceAllUsesWith(V);
1933 }
else if (HasResult) {
1935 if (UseSizedLibcall)
1942 I->replaceAllUsesWith(V);
1944 I->eraseFromParent();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static unsigned getAtomicOpSize(LoadInst *LI)
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Select target instructions out of generic instructions
Module.h This file contains the declarations for the Module class.
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
void setAlignment(Align Align)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getNewValOperand()
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Value * getCompareOperand()
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Value * getPointerOperand()
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Min
*p = old <signed v ? old : v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Common base class shared among various IRBuilders.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
UnreachableInst * CreateUnreachable()
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, Instruction *FMFSource=nullptr, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
BasicBlock::iterator GetInsertPoint() const
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
BasicBlock * GetInsertBlock() const
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Value * CreateNot(Value *V, const Twine &Name="")
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
LLVMContext & getContext() const
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args=std::nullopt, const Twine &Name="", MDNode *FPMathTag=nullptr)
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
const BasicBlock * getParent() const
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
const Function * getFunction() const
Return the function this instruction belongs to.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAtomicExpandPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void initializeAtomicExpandPass(PassRegistry &)
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
This struct is a compact representation of a valid (non-zero power of two) alignment.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.