58#define DEBUG_TYPE "atomic-expand"
79 bool tryExpandAtomicLoad(
LoadInst *LI);
80 bool expandAtomicLoadToLL(
LoadInst *LI);
81 bool expandAtomicLoadToCmpXchg(
LoadInst *LI);
91 void expandAtomicOpToLLSC(
95 void expandPartwordAtomicRMW(
103 static Value *insertRMWCmpXchgLoop(
119 void expandAtomicLoadToLibcall(
LoadInst *LI);
120 void expandAtomicStoreToLibcall(
StoreInst *LI);
130struct ReplacementIRBuilder :
IRBuilder<InstSimplifyFolder> {
141char AtomicExpand::ID = 0;
153 return DL.getTypeStoreSize(LI->
getType());
158 return DL.getTypeStoreSize(
SI->getValueOperand()->getType());
174template <
typename Inst>
177 Align Alignment =
I->getAlign();
178 return Alignment >=
Size &&
182bool AtomicExpand::runOnFunction(
Function &
F) {
183 auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
188 const auto *Subtarget =
TM.getSubtargetImpl(
F);
189 if (!Subtarget->enableAtomicExpand())
191 TLI = Subtarget->getTargetLowering();
192 DL = &
F.getParent()->getDataLayout();
199 if (
I.isAtomic() && !isa<FenceInst>(&
I))
202 bool MadeChange =
false;
203 for (
auto *
I : AtomicInsts) {
204 auto LI = dyn_cast<LoadInst>(
I);
205 auto SI = dyn_cast<StoreInst>(
I);
206 auto RMWI = dyn_cast<AtomicRMWInst>(
I);
207 auto CASI = dyn_cast<AtomicCmpXchgInst>(
I);
208 assert((LI || SI || RMWI || CASI) &&
"Unknown atomic instruction");
213 expandAtomicLoadToLibcall(LI);
219 expandAtomicStoreToLibcall(SI);
225 expandAtomicRMWToLibcall(RMWI);
231 expandAtomicCASToLibcall(CASI);
237 if (LI && TLI->shouldCastAtomicLoadInIR(LI) ==
238 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
239 I = LI = convertAtomicLoadToIntegerType(LI);
242 TLI->shouldCastAtomicStoreInIR(SI) ==
243 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
244 I =
SI = convertAtomicStoreToIntegerType(SI);
247 TLI->shouldCastAtomicRMWIInIR(RMWI) ==
248 TargetLoweringBase::AtomicExpansionKind::CastToInteger) {
249 I = RMWI = convertAtomicXchgToIntegerType(RMWI);
254 if (CASI->getCompareOperand()->getType()->isPointerTy()) {
257 I = CASI = convertCmpXchgToIntegerType(CASI);
262 if (TLI->shouldInsertFencesForAtomic(
I)) {
263 auto FenceOrdering = AtomicOrdering::Monotonic;
265 FenceOrdering = LI->getOrdering();
266 LI->setOrdering(AtomicOrdering::Monotonic);
268 FenceOrdering =
SI->getOrdering();
269 SI->setOrdering(AtomicOrdering::Monotonic);
272 FenceOrdering = RMWI->getOrdering();
273 RMWI->setOrdering(AtomicOrdering::Monotonic);
275 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
276 TargetLoweringBase::AtomicExpansionKind::None &&
284 FenceOrdering = CASI->getMergedOrdering();
285 CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
286 CASI->setFailureOrdering(AtomicOrdering::Monotonic);
289 if (FenceOrdering != AtomicOrdering::Monotonic) {
290 MadeChange |= bracketInstWithFences(
I, FenceOrdering);
292 }
else if (
I->hasAtomicStore() &&
293 TLI->shouldInsertTrailingFenceForAtomicStore(
I)) {
294 auto FenceOrdering = AtomicOrdering::Monotonic;
296 FenceOrdering =
SI->getOrdering();
298 FenceOrdering = RMWI->getOrdering();
299 else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=
300 TargetLoweringBase::AtomicExpansionKind::LLSC)
302 FenceOrdering = CASI->getSuccessOrdering();
305 if (
auto TrailingFence =
306 TLI->emitTrailingFence(Builder,
I, FenceOrdering)) {
307 TrailingFence->moveAfter(
I);
313 MadeChange |= tryExpandAtomicLoad(LI);
315 MadeChange |= tryExpandAtomicStore(SI);
322 if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
326 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
328 if (ValueSize < MinCASSize &&
331 RMWI = widenPartwordAtomicRMW(RMWI);
335 MadeChange |= tryExpandAtomicRMW(RMWI);
338 MadeChange |= tryExpandAtomicCmpXchg(CASI);
346 auto LeadingFence = TLI->emitLeadingFence(Builder,
I, Order);
348 auto TrailingFence = TLI->emitTrailingFence(Builder,
I, Order);
352 TrailingFence->moveAfter(
I);
354 return (LeadingFence || TrailingFence);
360 EVT VT = TLI->getMemValueType(
DL,
T);
371 Type *NewTy = getCorrespondingIntegerType(LI->
getType(),
M->getDataLayout());
376 Type *PT = PointerType::get(NewTy,
Addr->getType()->getPointerAddressSpace());
379 auto *NewLI =
Builder.CreateLoad(NewTy, NewAddr);
380 NewLI->setAlignment(LI->
getAlign());
383 LLVM_DEBUG(
dbgs() <<
"Replaced " << *LI <<
" with " << *NewLI <<
"\n");
392AtomicExpand::convertAtomicXchgToIntegerType(
AtomicRMWInst *RMWI) {
395 getCorrespondingIntegerType(RMWI->
getType(),
M->getDataLayout());
404 ?
Builder.CreatePtrToInt(Val, NewTy)
405 :
Builder.CreateBitCast(Val, NewTy);
411 LLVM_DEBUG(
dbgs() <<
"Replaced " << *RMWI <<
" with " << *NewRMWI <<
"\n");
421bool AtomicExpand::tryExpandAtomicLoad(
LoadInst *LI) {
422 switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
423 case TargetLoweringBase::AtomicExpansionKind::None:
425 case TargetLoweringBase::AtomicExpansionKind::LLSC:
426 expandAtomicOpToLLSC(
431 case TargetLoweringBase::AtomicExpansionKind::LLOnly:
432 return expandAtomicLoadToLL(LI);
433 case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
434 return expandAtomicLoadToCmpXchg(LI);
435 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
436 LI->
setAtomic(AtomicOrdering::NotAtomic);
443bool AtomicExpand::tryExpandAtomicStore(
StoreInst *SI) {
444 switch (TLI->shouldExpandAtomicStoreInIR(SI)) {
445 case TargetLoweringBase::AtomicExpansionKind::None:
447 case TargetLoweringBase::AtomicExpansionKind::Expand:
448 expandAtomicStore(SI);
450 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
451 SI->setAtomic(AtomicOrdering::NotAtomic);
458bool AtomicExpand::expandAtomicLoadToLL(
LoadInst *LI) {
464 Value *Val = TLI->emitLoadLinked(Builder, LI->
getType(),
466 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
474bool AtomicExpand::expandAtomicLoadToCmpXchg(
LoadInst *LI) {
477 if (Order == AtomicOrdering::Unordered)
478 Order = AtomicOrdering::Monotonic;
487 Value *Loaded =
Builder.CreateExtractValue(Pair, 0,
"loaded");
505 auto *
M =
SI->getModule();
506 Type *NewTy = getCorrespondingIntegerType(
SI->getValueOperand()->getType(),
508 Value *NewVal =
Builder.CreateBitCast(
SI->getValueOperand(), NewTy);
511 Type *PT = PointerType::get(NewTy,
Addr->getType()->getPointerAddressSpace());
518 LLVM_DEBUG(
dbgs() <<
"Replaced " << *SI <<
" with " << *NewSI <<
"\n");
519 SI->eraseFromParent();
523void AtomicExpand::expandAtomicStore(
StoreInst *SI) {
532 assert(Ordering != AtomicOrdering::NotAtomic);
533 AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered
534 ? AtomicOrdering::Monotonic
538 SI->getAlign(), RMWOrdering);
539 SI->eraseFromParent();
542 tryExpandAtomicRMW(AI);
556 unsigned AS =
Addr->getType()->getPointerAddressSpace();
558 NewVal =
Builder.CreateBitCast(NewVal, IntTy);
559 Loaded =
Builder.CreateBitCast(Loaded, IntTy);
563 Addr, Loaded, NewVal, AddrAlign, MemOpOrder,
566 NewLoaded =
Builder.CreateExtractValue(Pair, 0,
"newloaded");
569 NewLoaded =
Builder.CreateBitCast(NewLoaded, OrigTy);
576 case TargetLoweringBase::AtomicExpansionKind::None:
578 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
579 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
581 if (ValueSize < MinCASSize) {
582 expandPartwordAtomicRMW(AI,
583 TargetLoweringBase::AtomicExpansionKind::LLSC);
594 case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
595 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
597 if (ValueSize < MinCASSize) {
598 expandPartwordAtomicRMW(AI,
599 TargetLoweringBase::AtomicExpansionKind::CmpXChg);
609 <<
"A compare and swap loop was generated for an atomic "
611 << MemScope <<
" memory scope";
617 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
618 expandAtomicRMWToMaskedIntrinsic(AI);
621 case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {
622 TLI->emitBitTestAtomicRMWIntrinsic(AI);
625 case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {
626 TLI->emitCmpArithAtomicRMWIntrinsic(AI);
629 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
631 case TargetLoweringBase::AtomicExpansionKind::Expand:
632 TLI->emitExpandAtomicRMW(AI);
641struct PartwordMaskValues {
643 Type *WordType =
nullptr;
645 Type *IntValueType =
nullptr;
646 Value *AlignedAddr =
nullptr;
647 Align AlignedAddrAlignment;
649 Value *ShiftAmt =
nullptr;
651 Value *Inv_Mask =
nullptr;
656 auto PrintObj = [&
O](
auto *
V) {
663 O <<
"PartwordMaskValues {\n";
665 PrintObj(PMV.WordType);
667 PrintObj(PMV.ValueType);
668 O <<
" AlignedAddr: ";
669 PrintObj(PMV.AlignedAddr);
670 O <<
" AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() <<
'\n';
672 PrintObj(PMV.ShiftAmt);
676 PrintObj(PMV.Inv_Mask);
702 unsigned MinWordSize) {
703 PartwordMaskValues PMV;
708 unsigned ValueSize =
DL.getTypeStoreSize(
ValueType);
710 PMV.ValueType = PMV.IntValueType =
ValueType;
711 if (PMV.ValueType->isFloatingPointTy())
715 PMV.WordType = MinWordSize > ValueSize ?
Type::getIntNTy(Ctx, MinWordSize * 8)
717 if (PMV.ValueType == PMV.WordType) {
718 PMV.AlignedAddr =
Addr;
719 PMV.AlignedAddrAlignment = AddrAlign;
725 PMV.AlignedAddrAlignment =
Align(MinWordSize);
727 assert(ValueSize < MinWordSize);
731 IntegerType *IntTy =
DL.getIntPtrType(Ctx, PtrTy->getAddressSpace());
734 if (AddrAlign < MinWordSize) {
735 PMV.AlignedAddr =
Builder.CreateIntrinsic(
736 Intrinsic::ptrmask, {PtrTy, IntTy},
741 PtrLSB =
Builder.CreateAnd(AddrInt, MinWordSize - 1,
"PtrLSB");
744 PMV.AlignedAddr =
Addr;
745 PtrLSB = ConstantInt::getNullValue(IntTy);
748 if (
DL.isLittleEndian()) {
750 PMV.ShiftAmt =
Builder.CreateShl(PtrLSB, 3);
753 PMV.ShiftAmt =
Builder.CreateShl(
754 Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);
757 PMV.ShiftAmt =
Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType,
"ShiftAmt");
762 PMV.Inv_Mask =
Builder.CreateNot(PMV.Mask,
"Inv_Mask");
766 Builder.CreateBitCast(PMV.AlignedAddr, WordPtrType,
"AlignedAddr");
772 const PartwordMaskValues &PMV) {
773 assert(WideWord->
getType() == PMV.WordType &&
"Widened type mismatch");
774 if (PMV.WordType == PMV.ValueType)
777 Value *Shift =
Builder.CreateLShr(WideWord, PMV.ShiftAmt,
"shifted");
778 Value *Trunc =
Builder.CreateTrunc(Shift, PMV.IntValueType,
"extracted");
779 return Builder.CreateBitCast(Trunc, PMV.ValueType);
783 Value *Updated,
const PartwordMaskValues &PMV) {
784 assert(WideWord->
getType() == PMV.WordType &&
"Widened type mismatch");
785 assert(Updated->
getType() == PMV.ValueType &&
"Value type mismatch");
786 if (PMV.WordType == PMV.ValueType)
789 Updated =
Builder.CreateBitCast(Updated, PMV.IntValueType);
791 Value *ZExt =
Builder.CreateZExt(Updated, PMV.WordType,
"extended");
793 Builder.CreateShl(ZExt, PMV.ShiftAmt,
"shifted",
true);
805 const PartwordMaskValues &PMV) {
811 Value *Loaded_MaskOut =
Builder.CreateAnd(Loaded, PMV.Inv_Mask);
812 Value *FinalVal =
Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
824 Value *NewVal_Masked =
Builder.CreateAnd(NewVal, PMV.Mask);
825 Value *Loaded_MaskOut =
Builder.CreateAnd(Loaded, PMV.Inv_Mask);
826 Value *FinalVal =
Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
859void AtomicExpand::expandPartwordAtomicRMW(
866 PartwordMaskValues PMV =
868 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
870 Value *ValOperand_Shifted =
nullptr;
877 PMV.ShiftAmt,
"ValOperand_Shifted");
886 if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {
887 OldResult = insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr,
888 PMV.AlignedAddrAlignment, MemOpOrder, SSID,
891 assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);
892 OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,
893 PMV.AlignedAddrAlignment, MemOpOrder,
909 "Unable to widen operation");
911 PartwordMaskValues PMV =
913 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
915 Value *ValOperand_Shifted =
917 PMV.ShiftAmt,
"ValOperand_Shifted");
923 Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted,
"AndOperand");
925 NewOperand = ValOperand_Shifted;
928 Builder.CreateAtomicRMW(Op, PMV.AlignedAddr, NewOperand,
990 std::prev(BB->
end())->eraseFromParent();
993 PartwordMaskValues PMV =
995 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
998 Value *NewVal_Shifted =
999 Builder.CreateShl(
Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
1000 Value *Cmp_Shifted =
1001 Builder.CreateShl(
Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
1005 LoadInst *InitLoaded =
Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
1007 Value *InitLoaded_MaskOut =
Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
1011 Builder.SetInsertPoint(LoopBB);
1013 Loaded_MaskOut->
addIncoming(InitLoaded_MaskOut, BB);
1016 Value *FullWord_NewVal =
Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
1017 Value *FullWord_Cmp =
Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
1019 PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,
1038 Builder.SetInsertPoint(FailureBB);
1042 Value *OldVal_MaskOut =
Builder.CreateAnd(OldVal, PMV.Inv_Mask);
1043 Value *ShouldContinue =
Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
1044 Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
1047 Loaded_MaskOut->
addIncoming(OldVal_MaskOut, FailureBB);
1054 Res =
Builder.CreateInsertValue(Res, FinalOldVal, 0);
1062void AtomicExpand::expandAtomicOpToLLSC(
1067 Value *Loaded = insertRMWLLSCLoop(Builder, ResultType,
Addr, AddrAlign,
1068 MemOpOrder, PerformOp);
1070 I->replaceAllUsesWith(Loaded);
1071 I->eraseFromParent();
1074void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(
AtomicRMWInst *AI) {
1077 PartwordMaskValues PMV =
1079 AI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1087 CastOp = Instruction::SExt;
1091 PMV.ShiftAmt,
"ValOperand_Shifted");
1092 Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
1093 Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
1105 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1113 Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(
1114 Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,
1118 Res =
Builder.CreateInsertValue(Res, FinalOldVal, 0);
1120 CmpVal_Shifted,
Builder.CreateAnd(OldVal, PMV.Mask),
"Success");
1127Value *AtomicExpand::insertRMWLLSCLoop(
1136 F->getParent()->getDataLayout().getTypeStoreSize(ResultTy) &&
1137 "Expected at least natural alignment at this point.");
1157 std::prev(BB->
end())->eraseFromParent();
1162 Builder.SetInsertPoint(LoopBB);
1163 Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy,
Addr, MemOpOrder);
1165 Value *NewVal = PerformOp(Builder, Loaded);
1167 Value *StoreSuccess =
1168 TLI->emitStoreConditional(Builder, NewVal,
Addr, MemOpOrder);
1171 Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
1186 M->getDataLayout());
1191 Type *PT = PointerType::get(NewTy,
Addr->getType()->getPointerAddressSpace());
1197 auto *NewCI =
Builder.CreateAtomicCmpXchg(
1202 LLVM_DEBUG(
dbgs() <<
"Replaced " << *CI <<
" with " << *NewCI <<
"\n");
1210 Res =
Builder.CreateInsertValue(Res, OldVal, 0);
1211 Res =
Builder.CreateInsertValue(Res, Succ, 1);
1229 bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1231 ? AtomicOrdering::Monotonic
1243 bool HasReleasedLoadBB = !CI->
isWeak() && ShouldInsertFencesForAtomic &&
1244 SuccessOrder != AtomicOrdering::Monotonic &&
1245 SuccessOrder != AtomicOrdering::Acquire &&
1250 bool UseUnconditionalReleaseBarrier =
F->hasMinSize() && !CI->
isWeak();
1304 auto ReleasedLoadBB =
1308 auto ReleasingStoreBB =
1317 std::prev(BB->
end())->eraseFromParent();
1319 if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1320 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1322 PartwordMaskValues PMV =
1324 CI->
getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);
1328 Builder.SetInsertPoint(StartBB);
1329 Value *UnreleasedLoad =
1330 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1331 Value *UnreleasedLoadExtract =
1338 Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1340 Builder.SetInsertPoint(ReleasingStoreBB);
1341 if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1342 TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1345 Builder.SetInsertPoint(TryStoreBB);
1347 Builder.CreatePHI(PMV.WordType, 2,
"loaded.trystore");
1348 LoadedTryStore->
addIncoming(UnreleasedLoad, ReleasingStoreBB);
1349 Value *NewValueInsert =
1351 Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,
1352 PMV.AlignedAddr, MemOpOrder);
1353 StoreSuccess =
Builder.CreateICmpEQ(
1355 BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1356 Builder.CreateCondBr(StoreSuccess, SuccessBB,
1357 CI->
isWeak() ? FailureBB : RetryBB);
1359 Builder.SetInsertPoint(ReleasedLoadBB);
1361 if (HasReleasedLoadBB) {
1363 TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);
1365 ShouldStore =
Builder.CreateICmpEQ(SecondLoadExtract,
1370 Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1372 LoadedTryStore->
addIncoming(SecondLoad, ReleasedLoadBB);
1378 Builder.SetInsertPoint(SuccessBB);
1379 if (ShouldInsertFencesForAtomic ||
1380 TLI->shouldInsertTrailingFenceForAtomicStore(CI))
1381 TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1384 Builder.SetInsertPoint(NoStoreBB);
1386 Builder.CreatePHI(UnreleasedLoad->
getType(), 2,
"loaded.nostore");
1387 LoadedNoStore->
addIncoming(UnreleasedLoad, StartBB);
1388 if (HasReleasedLoadBB)
1389 LoadedNoStore->
addIncoming(SecondLoad, ReleasedLoadBB);
1394 TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1397 Builder.SetInsertPoint(FailureBB);
1399 Builder.CreatePHI(UnreleasedLoad->
getType(), 2,
"loaded.failure");
1400 LoadedFailure->
addIncoming(LoadedNoStore, NoStoreBB);
1402 LoadedFailure->
addIncoming(LoadedTryStore, TryStoreBB);
1403 if (ShouldInsertFencesForAtomic)
1404 TLI->emitTrailingFence(Builder, CI, FailureOrder);
1414 LoadedExit->
addIncoming(LoadedTryStore, SuccessBB);
1415 LoadedExit->
addIncoming(LoadedFailure, FailureBB);
1422 Value *LoadedFull = LoadedExit;
1424 Builder.SetInsertPoint(ExitBB, std::next(
Success->getIterator()));
1436 "weird extraction from { iN, i1 }");
1447 for (
auto *EV : PrunedInsts)
1477 return C->isMinusOne();
1484bool AtomicExpand::simplifyIdempotentRMW(
AtomicRMWInst *RMWI) {
1485 if (
auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1486 tryExpandAtomicLoad(ResultingLoad);
1492Value *AtomicExpand::insertRMWCmpXchgLoop(
1523 std::prev(BB->
end())->eraseFromParent();
1529 Builder.SetInsertPoint(LoopBB);
1533 Value *NewVal = PerformOp(Builder, Loaded);
1535 Value *NewLoaded =
nullptr;
1538 CreateCmpXchg(Builder,
Addr, Loaded, NewVal, AddrAlign,
1539 MemOpOrder == AtomicOrdering::Unordered
1540 ? AtomicOrdering::Monotonic
1554 unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1557 switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1560 case TargetLoweringBase::AtomicExpansionKind::None:
1561 if (ValueSize < MinCASSize)
1562 return expandPartwordCmpXchg(CI);
1564 case TargetLoweringBase::AtomicExpansionKind::LLSC: {
1565 return expandAtomicCmpXchg(CI);
1567 case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
1568 expandAtomicCmpXchgToMaskedIntrinsic(CI);
1570 case TargetLoweringBase::AtomicExpansionKind::NotAtomic:
1579 Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1583 return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,
1584 AI->getValOperand());
1607 unsigned LargestSize =
DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1608 return Alignment >=
Size &&
1610 Size <= LargestSize;
1613void AtomicExpand::expandAtomicLoadToLibcall(
LoadInst *
I) {
1615 RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1616 RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1619 bool expanded = expandAtomicOpToLibcall(
1620 I,
Size,
I->getAlign(),
I->getPointerOperand(),
nullptr,
nullptr,
1621 I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1626void AtomicExpand::expandAtomicStoreToLibcall(
StoreInst *
I) {
1628 RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1629 RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1632 bool expanded = expandAtomicOpToLibcall(
1633 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getValueOperand(),
1634 nullptr,
I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1641 RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1642 RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1643 RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1646 bool expanded = expandAtomicOpToLibcall(
1647 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getNewValOperand(),
1648 I->getCompareOperand(),
I->getSuccessOrdering(),
I->getFailureOrdering(),
1656 RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1657 RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1658 RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1660 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1661 RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1662 RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1664 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1665 RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1666 RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1668 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1669 RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1670 RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1672 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1673 RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1674 RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1676 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1677 RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1678 RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1680 RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1681 RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1682 RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1723 if (!Libcalls.
empty())
1724 Success = expandAtomicOpToLibcall(
1725 I,
Size,
I->getAlign(),
I->getPointerOperand(),
I->getValOperand(),
1726 nullptr,
I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1739 Addr, Loaded, NewVal, Alignment, MemOpOrder,
1742 NewLoaded =
Builder.CreateExtractValue(Pair, 0,
"newloaded");
1745 expandAtomicCASToLibcall(Pair);
1756bool AtomicExpand::expandAtomicOpToLibcall(
1766 IRBuilder<> AllocaBuilder(&
I->getFunction()->getEntryBlock().front());
1771 const Align AllocaAlignment =
DL.getPrefTypeAlign(SizedIntTy);
1776 assert(Ordering != AtomicOrdering::NotAtomic &&
"expect atomic MO");
1781 assert(Ordering2 != AtomicOrdering::NotAtomic &&
"expect atomic MO");
1788 if (UseSizedLibcall) {
1791 RTLibType = Libcalls[1];
1794 RTLibType = Libcalls[2];
1797 RTLibType = Libcalls[3];
1800 RTLibType = Libcalls[4];
1803 RTLibType = Libcalls[5];
1806 }
else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1807 RTLibType = Libcalls[0];
1814 if (!TLI->getLibcallName(RTLibType)) {
1846 Value *AllocaCASExpected_i8 =
nullptr;
1848 Value *AllocaValue_i8 =
nullptr;
1850 Value *AllocaResult_i8 =
nullptr;
1857 if (!UseSizedLibcall) {
1871 Args.push_back(PtrVal);
1875 AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->
getType());
1879 AllocaCASExpected_i8 =
Builder.CreateBitCast(
1881 Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1882 Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1883 Args.push_back(AllocaCASExpected_i8);
1888 if (UseSizedLibcall) {
1890 Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1891 Args.push_back(IntValue);
1893 AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->
getType());
1897 Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1898 Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1899 Args.push_back(AllocaValue_i8);
1904 if (!CASExpected && HasResult && !UseSizedLibcall) {
1905 AllocaResult = AllocaBuilder.CreateAlloca(
I->getType());
1910 Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1911 Args.push_back(AllocaResult_i8);
1915 Args.push_back(OrderingVal);
1919 Args.push_back(Ordering2Val);
1925 }
else if (HasResult && UseSizedLibcall)
1926 ResultTy = SizedIntTy;
1934 FunctionType *FnType = FunctionType::get(ResultTy, ArgTys,
false);
1936 M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1938 Call->setAttributes(Attr);
1942 if (ValueOperand && !UseSizedLibcall)
1943 Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1948 Type *FinalResultTy =
I->getType();
1951 CASExpected->
getType(), AllocaCASExpected, AllocaAlignment);
1952 Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1953 V =
Builder.CreateInsertValue(V, ExpectedOut, 0);
1954 V =
Builder.CreateInsertValue(V, Result, 1);
1955 I->replaceAllUsesWith(V);
1956 }
else if (HasResult) {
1958 if (UseSizedLibcall)
1959 V =
Builder.CreateBitOrPointerCast(Result,
I->getType());
1961 V =
Builder.CreateAlignedLoad(
I->getType(), AllocaResult,
1963 Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1965 I->replaceAllUsesWith(V);
1967 I->eraseFromParent();
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
static unsigned getAtomicOpSize(LoadInst *LI)
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded)
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Atomic ordering constants.
This file contains the simple types necessary to represent the attributes associated with functions a...
#define LLVM_ATTRIBUTE_UNUSED
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Module.h This file contains the declarations for the Module class.
print must be executed print the must be executed context for all instructions
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallVector class.
This file describes how to lower LLVM code to machine code.
Target-Independent Code Generator Pass Configuration Options pass.
an instruction to allocate memory on the stack
PointerType * getType() const
Overload to return most specific pointer type.
void setAlignment(Align Align)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
size_t size() const
size - Get the array size.
bool empty() const
empty - Check if the array is empty.
An instruction that atomically checks whether a specified value is in a memory location,...
Value * getNewValOperand()
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
void setWeak(bool IsWeak)
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Value * getCompareOperand()
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Value * getPointerOperand()
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
an instruction that atomically reads a memory location, combines it with another value,...
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Min
*p = old <signed v ? old : v
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ UMin
*p = old <unsigned v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ UMax
*p = old >unsigned v ? old : v
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
Value * getPointerOperand()
BinOp getOperation() const
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
static StringRef getOperationName(BinOp Op)
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
LLVM Basic Block Representation.
iterator begin()
Instruction iterator methods.
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
const Function * getParent() const
Return the enclosing method, or null if none.
This class represents a function call, abstracting a target machine's calling convention.
This is the shared class of boolean and integer constants.
static ConstantInt * getTrue(LLVMContext &Context)
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
static ConstantInt * getFalse(LLVMContext &Context)
This is an important base class in LLVM.
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
FunctionPass class - This class is used to implement most global optimizations.
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Common base class shared among various IRBuilders.
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
const BasicBlock * getParent() const
const Function * getFunction() const
Return the function this instruction belongs to.
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Class to represent integer types.
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
This is an important class for using LLVM in a threaded context.
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
An instruction for reading from memory.
Value * getPointerOperand()
bool isVolatile() const
Return true if this is a load from a volatile memory location.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
void setVolatile(bool V)
Specify whether this is a volatile load or not.
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Align getAlign() const
Return the alignment of the access that is being performed.
A Module instance is used to store all the information related to an LLVM module.
LLVMContext & getContext() const
Get the global data context.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
void setVolatile(bool V)
Specify whether this is a volatile store or not.
void setAlignment(Align Align)
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Primary interface to the complete machine description for the target machine.
The instances of the Type class are immutable: once they are created, they are never changed.
PointerType * getPointerTo(unsigned AddrSpace=0) const
Return a pointer to the current type.
bool isPointerTy() const
True if this is an instance of PointerType.
static IntegerType * getInt1Ty(LLVMContext &C)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
static Type * getVoidTy(LLVMContext &C)
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
LLVM Value Representation.
Type * getType() const
All values are typed, get the type of this value.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
iterator_range< user_iterator > users()
An efficient, type-erasing, non-owning reference to a callable.
self_iterator getIterator()
This class implements an extremely fast bulk output stream that can only output to a stream.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAtomicExpandPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
bool isReleaseOrStronger(AtomicOrdering AO)
AtomicOrderingCABI toCABI(AtomicOrdering AO)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
void initializeAtomicExpandPass(PassRegistry &)
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
AtomicOrdering
Atomic ordering for LLVM's memory model.
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
@ Or
Bitwise or logical OR of integers.
@ And
Bitwise or logical AND of integers.
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
bool isAcquireOrStronger(AtomicOrdering AO)
constexpr unsigned BitWidth
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
This struct is a compact representation of a valid (non-zero power of two) alignment.
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.