LCOV - code coverage report
Current view: top level - lib/CodeGen - AtomicExpandPass.cpp (source / functions) Hit Total Coverage
Test: llvm-toolchain.info Lines: 301 609 49.4 %
Date: 2018-10-20 13:21:21 Functions: 35 48 72.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
       2             : //
       3             : //                     The LLVM Compiler Infrastructure
       4             : //
       5             : // This file is distributed under the University of Illinois Open Source
       6             : // License. See LICENSE.TXT for details.
       7             : //
       8             : //===----------------------------------------------------------------------===//
       9             : //
      10             : // This file contains a pass (at IR level) to replace atomic instructions with
      11             : // __atomic_* library calls, or target specific instruction which implement the
      12             : // same semantics in a way which better fits the target backend.  This can
      13             : // include the use of (intrinsic-based) load-linked/store-conditional loops,
      14             : // AtomicCmpXchg, or type coercions.
      15             : //
      16             : //===----------------------------------------------------------------------===//
      17             : 
      18             : #include "llvm/ADT/ArrayRef.h"
      19             : #include "llvm/ADT/STLExtras.h"
      20             : #include "llvm/ADT/SmallVector.h"
      21             : #include "llvm/CodeGen/AtomicExpandUtils.h"
      22             : #include "llvm/CodeGen/RuntimeLibcalls.h"
      23             : #include "llvm/CodeGen/TargetLowering.h"
      24             : #include "llvm/CodeGen/TargetPassConfig.h"
      25             : #include "llvm/CodeGen/TargetSubtargetInfo.h"
      26             : #include "llvm/CodeGen/ValueTypes.h"
      27             : #include "llvm/IR/Attributes.h"
      28             : #include "llvm/IR/BasicBlock.h"
      29             : #include "llvm/IR/Constant.h"
      30             : #include "llvm/IR/Constants.h"
      31             : #include "llvm/IR/DataLayout.h"
      32             : #include "llvm/IR/DerivedTypes.h"
      33             : #include "llvm/IR/Function.h"
      34             : #include "llvm/IR/IRBuilder.h"
      35             : #include "llvm/IR/InstIterator.h"
      36             : #include "llvm/IR/Instruction.h"
      37             : #include "llvm/IR/Instructions.h"
      38             : #include "llvm/IR/Module.h"
      39             : #include "llvm/IR/Type.h"
      40             : #include "llvm/IR/User.h"
      41             : #include "llvm/IR/Value.h"
      42             : #include "llvm/Pass.h"
      43             : #include "llvm/Support/AtomicOrdering.h"
      44             : #include "llvm/Support/Casting.h"
      45             : #include "llvm/Support/Debug.h"
      46             : #include "llvm/Support/ErrorHandling.h"
      47             : #include "llvm/Support/raw_ostream.h"
      48             : #include "llvm/Target/TargetMachine.h"
      49             : #include <cassert>
      50             : #include <cstdint>
      51             : #include <iterator>
      52             : 
      53             : using namespace llvm;
      54             : 
      55             : #define DEBUG_TYPE "atomic-expand"
      56             : 
      57             : namespace {
      58             : 
      59             :   class AtomicExpand: public FunctionPass {
      60             :     const TargetLowering *TLI = nullptr;
      61             : 
      62             :   public:
      63             :     static char ID; // Pass identification, replacement for typeid
      64             : 
      65       26059 :     AtomicExpand() : FunctionPass(ID) {
      66       26059 :       initializeAtomicExpandPass(*PassRegistry::getPassRegistry());
      67       26059 :     }
      68             : 
      69             :     bool runOnFunction(Function &F) override;
      70             : 
      71             :   private:
      72             :     bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
      73             :     IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
      74             :     LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
      75             :     bool tryExpandAtomicLoad(LoadInst *LI);
      76             :     bool expandAtomicLoadToLL(LoadInst *LI);
      77             :     bool expandAtomicLoadToCmpXchg(LoadInst *LI);
      78             :     StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
      79             :     bool expandAtomicStore(StoreInst *SI);
      80             :     bool tryExpandAtomicRMW(AtomicRMWInst *AI);
      81             :     Value *
      82             :     insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
      83             :                       AtomicOrdering MemOpOrder,
      84             :                       function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
      85             :     void expandAtomicOpToLLSC(
      86             :         Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder,
      87             :         function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
      88             :     void expandPartwordAtomicRMW(
      89             :         AtomicRMWInst *I,
      90             :         TargetLoweringBase::AtomicExpansionKind ExpansionKind);
      91             :     AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
      92             :     void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
      93             :     void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
      94             : 
      95             :     AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
      96             :     static Value *insertRMWCmpXchgLoop(
      97             :         IRBuilder<> &Builder, Type *ResultType, Value *Addr,
      98             :         AtomicOrdering MemOpOrder,
      99             :         function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
     100             :         CreateCmpXchgInstFun CreateCmpXchg);
     101             :     bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
     102             : 
     103             :     bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
     104             :     bool isIdempotentRMW(AtomicRMWInst *RMWI);
     105             :     bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
     106             : 
     107             :     bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
     108             :                                  Value *PointerOperand, Value *ValueOperand,
     109             :                                  Value *CASExpected, AtomicOrdering Ordering,
     110             :                                  AtomicOrdering Ordering2,
     111             :                                  ArrayRef<RTLIB::Libcall> Libcalls);
     112             :     void expandAtomicLoadToLibcall(LoadInst *LI);
     113             :     void expandAtomicStoreToLibcall(StoreInst *LI);
     114             :     void expandAtomicRMWToLibcall(AtomicRMWInst *I);
     115             :     void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
     116             : 
     117             :     friend bool
     118             :     llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
     119             :                                    CreateCmpXchgInstFun CreateCmpXchg);
     120             :   };
     121             : 
     122             : } // end anonymous namespace
     123             : 
     124             : char AtomicExpand::ID = 0;
     125             : 
     126             : char &llvm::AtomicExpandID = AtomicExpand::ID;
     127             : 
     128      129209 : INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
     129             :                 false, false)
     130             : 
     131       26050 : FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
     132             : 
     133             : // Helper functions to retrieve the size of atomic instructions.
     134        6983 : static unsigned getAtomicOpSize(LoadInst *LI) {
     135        6983 :   const DataLayout &DL = LI->getModule()->getDataLayout();
     136        6983 :   return DL.getTypeStoreSize(LI->getType());
     137             : }
     138             : 
     139        1690 : static unsigned getAtomicOpSize(StoreInst *SI) {
     140        1690 :   const DataLayout &DL = SI->getModule()->getDataLayout();
     141        1690 :   return DL.getTypeStoreSize(SI->getValueOperand()->getType());
     142             : }
     143             : 
     144       22787 : static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
     145       22787 :   const DataLayout &DL = RMWI->getModule()->getDataLayout();
     146       22787 :   return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
     147             : }
     148             : 
     149        9103 : static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
     150        9103 :   const DataLayout &DL = CASI->getModule()->getDataLayout();
     151        9103 :   return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
     152             : }
     153             : 
     154             : // Helper functions to retrieve the alignment of atomic instructions.
     155             : static unsigned getAtomicOpAlign(LoadInst *LI) {
     156             :   unsigned Align = LI->getAlignment();
     157             :   // In the future, if this IR restriction is relaxed, we should
     158             :   // return DataLayout::getABITypeAlignment when there's no align
     159             :   // value.
     160             :   assert(Align != 0 && "An atomic LoadInst always has an explicit alignment");
     161             :   return Align;
     162             : }
     163             : 
     164             : static unsigned getAtomicOpAlign(StoreInst *SI) {
     165             :   unsigned Align = SI->getAlignment();
     166             :   // In the future, if this IR restriction is relaxed, we should
     167             :   // return DataLayout::getABITypeAlignment when there's no align
     168             :   // value.
     169             :   assert(Align != 0 && "An atomic StoreInst always has an explicit alignment");
     170             :   return Align;
     171             : }
     172             : 
     173       10339 : static unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
     174             :   // TODO(PR27168): This instruction has no alignment attribute, but unlike the
     175             :   // default alignment for load/store, the default here is to assume
     176             :   // it has NATURAL alignment, not DataLayout-specified alignment.
     177       10339 :   const DataLayout &DL = RMWI->getModule()->getDataLayout();
     178       10339 :   return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
     179             : }
     180             : 
     181        4555 : static unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
     182             :   // TODO(PR27168): same comment as above.
     183        4555 :   const DataLayout &DL = CASI->getModule()->getDataLayout();
     184        4555 :   return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
     185             : }
     186             : 
     187             : // Determine if a particular atomic operation has a supported size,
     188             : // and is of appropriate alignment, to be passed through for target
     189             : // lowering. (Versus turning into a __atomic libcall)
     190             : template <typename Inst>
     191       23550 : static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
     192       23550 :   unsigned Size = getAtomicOpSize(I);
     193       14886 :   unsigned Align = getAtomicOpAlign(I);
     194       23550 :   return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
     195             : }
     196        4551 : 
     197        4551 : bool AtomicExpand::runOnFunction(Function &F) {
     198        4551 :   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
     199        4551 :   if (!TPC)
     200             :     return false;
     201       10335 : 
     202       10335 :   auto &TM = TPC->getTM<TargetMachine>();
     203       10335 :   if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
     204       10335 :     return false;
     205             :   TLI = TM.getSubtargetImpl(F)->getTargetLowering();
     206        1685 : 
     207        1685 :   SmallVector<Instruction *, 1> AtomicInsts;
     208             : 
     209        1685 :   // Changing control-flow while iterating through it is a bad idea, so gather a
     210             :   // list of all atomic instructions before we start.
     211        6979 :   for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
     212        6979 :     Instruction *I = &*II;
     213             :     if (I->isAtomic() && !isa<FenceInst>(I))
     214        6979 :       AtomicInsts.push_back(I);
     215             :   }
     216             : 
     217      396932 :   bool MadeChange = false;
     218      396932 :   for (auto I : AtomicInsts) {
     219      396932 :     auto LI = dyn_cast<LoadInst>(I);
     220             :     auto SI = dyn_cast<StoreInst>(I);
     221             :     auto RMWI = dyn_cast<AtomicRMWInst>(I);
     222      396932 :     auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
     223      396932 :     assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
     224             : 
     225      392178 :     // If the Size/Alignment is not supported, replace with a libcall.
     226             :     if (LI) {
     227             :       if (!atomicSizeSupported(TLI, LI)) {
     228             :         expandAtomicLoadToLibcall(LI);
     229             :         MadeChange = true;
     230             :         continue;
     231    37228938 :       }
     232    37228938 :     } else if (SI) {
     233    37228938 :       if (!atomicSizeSupported(TLI, SI)) {
     234       23550 :         expandAtomicStoreToLibcall(SI);
     235             :         MadeChange = true;
     236             :         continue;
     237             :       }
     238      415728 :     } else if (RMWI) {
     239             :       if (!atomicSizeSupported(TLI, RMWI)) {
     240             :         expandAtomicRMWToLibcall(RMWI);
     241             :         MadeChange = true;
     242             :         continue;
     243             :       }
     244             :     } else if (CASI) {
     245             :       if (!atomicSizeSupported(TLI, CASI)) {
     246       23550 :         expandAtomicCASToLibcall(CASI);
     247        6979 :         MadeChange = true;
     248           4 :         continue;
     249             :       }
     250           4 :     }
     251             : 
     252       16571 :     if (TLI->shouldInsertFencesForAtomic(I)) {
     253        1685 :       auto FenceOrdering = AtomicOrdering::Monotonic;
     254           5 :       if (LI && isAcquireOrStronger(LI->getOrdering())) {
     255             :         FenceOrdering = LI->getOrdering();
     256           5 :         LI->setOrdering(AtomicOrdering::Monotonic);
     257             :       } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
     258       14886 :         FenceOrdering = SI->getOrdering();
     259       10335 :         SI->setOrdering(AtomicOrdering::Monotonic);
     260           4 :       } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
     261             :                           isAcquireOrStronger(RMWI->getOrdering()))) {
     262           4 :         FenceOrdering = RMWI->getOrdering();
     263             :         RMWI->setOrdering(AtomicOrdering::Monotonic);
     264        4551 :       } else if (CASI &&
     265        4551 :                  TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
     266           3 :                      TargetLoweringBase::AtomicExpansionKind::None &&
     267             :                  (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
     268           3 :                   isAcquireOrStronger(CASI->getSuccessOrdering()))) {
     269             :         // If a compare and swap is lowered to LL/SC, we can do smarter fence
     270             :         // insertion, with a stronger one on the success path than on the
     271             :         // failure path. As a result, fence insertion is directly done by
     272       23534 :         // expandAtomicCmpXchg in that case.
     273             :         FenceOrdering = CASI->getSuccessOrdering();
     274        1444 :         CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
     275             :         CASI->setFailureOrdering(AtomicOrdering::Monotonic);
     276             :       }
     277        1344 : 
     278             :       if (FenceOrdering != AtomicOrdering::Monotonic) {
     279             :         MadeChange |= bracketInstWithFences(I, FenceOrdering);
     280        1255 :       }
     281             :     }
     282             : 
     283             :     if (LI) {
     284         265 :       if (LI->getType()->isFloatingPointTy()) {
     285         265 :         // TODO: add a TLI hook to control this so that each target can
     286        1041 :         // convert to lowering the original type one at a time.
     287         118 :         LI = convertAtomicLoadToIntegerType(LI);
     288             :         assert(LI->getType()->isIntegerTy() && "invariant broken");
     289             :         MadeChange = true;
     290             :       }
     291             : 
     292             :       MadeChange |= tryExpandAtomicLoad(LI);
     293             :     } else if (SI) {
     294             :       if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
     295             :         // TODO: add a TLI hook to control this so that each target can
     296             :         // convert to lowering the original type one at a time.
     297             :         SI = convertAtomicStoreToIntegerType(SI);
     298         780 :         assert(SI->getValueOperand()->getType()->isIntegerTy() &&
     299         780 :                "invariant broken");
     300             :         MadeChange = true;
     301             :       }
     302             : 
     303       23534 :       if (TLI->shouldExpandAtomicStoreInIR(SI))
     304        6975 :         MadeChange |= expandAtomicStore(SI);
     305             :     } else if (RMWI) {
     306             :       // There are two different ways of expanding RMW instructions:
     307          10 :       // - into a load if it is idempotent
     308             :       // - into a Cmpxchg/LL-SC loop otherwise
     309             :       // we try them in that order.
     310             : 
     311             :       if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
     312        6975 :         MadeChange = true;
     313       16559 :       } else {
     314        1680 :         unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
     315             :         unsigned ValueSize = getAtomicOpSize(RMWI);
     316             :         AtomicRMWInst::BinOp Op = RMWI->getOperation();
     317          10 :         if (ValueSize < MinCASSize &&
     318             :             (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
     319             :              Op == AtomicRMWInst::And)) {
     320             :           RMWI = widenPartwordAtomicRMW(RMWI);
     321             :           MadeChange = true;
     322             :         }
     323        1680 : 
     324          44 :         MadeChange |= tryExpandAtomicRMW(RMWI);
     325       14879 :       }
     326             :     } else if (CASI) {
     327             :       // TODO: when we're ready to make the change at the IR level, we can
     328             :       // extend convertCmpXchgToInteger for floating point too.
     329             :       assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
     330             :              "unimplemented - floating point not legal at IR level");
     331       10331 :       if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
     332             :         // TODO: add a TLI hook to control this so that each target can
     333             :         // convert to lowering the original type one at a time.
     334       10311 :         CASI = convertCmpXchgToIntegerType(CASI);
     335       10311 :         assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
     336             :                "invariant broken");
     337       10311 :         MadeChange = true;
     338          18 :       }
     339           9 : 
     340           3 :       MadeChange |= tryExpandAtomicCmpXchg(CASI);
     341             :     }
     342             :   }
     343             :   return MadeChange;
     344       10311 : }
     345             : 
     346        4548 : bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
     347             :   IRBuilder<> Builder(I);
     348             : 
     349             :   auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
     350             : 
     351        9096 :   auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
     352             :   // We have a guard here because not every atomic operation generates a
     353             :   // trailing fence.
     354          10 :   if (TrailingFence)
     355             :     TrailingFence->moveAfter(I);
     356             : 
     357             :   return (LeadingFence || TrailingFence);
     358             : }
     359             : 
     360        4548 : /// Get the iX type with the same bitwidth as T.
     361             : IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
     362             :                                                        const DataLayout &DL) {
     363             :   EVT VT = TLI->getValueType(DL, T);
     364             :   unsigned BitWidth = VT.getStoreSizeInBits();
     365             :   assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
     366           0 :   return IntegerType::get(T->getContext(), BitWidth);
     367           0 : }
     368             : 
     369           0 : /// Convert an atomic load of a non-integral type to an integer load of the
     370             : /// equivalent bitwidth.  See the function comment on
     371           0 : /// convertAtomicStoreToIntegerType for background.
     372             : LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
     373             :   auto *M = LI->getModule();
     374           0 :   Type *NewTy = getCorrespondingIntegerType(LI->getType(),
     375           0 :                                             M->getDataLayout());
     376             : 
     377           0 :   IRBuilder<> Builder(LI);
     378             : 
     379             :   Value *Addr = LI->getPointerOperand();
     380             :   Type *PT = PointerType::get(NewTy,
     381           0 :                               Addr->getType()->getPointerAddressSpace());
     382             :   Value *NewAddr = Builder.CreateBitCast(Addr, PT);
     383           0 : 
     384             :   auto *NewLI = Builder.CreateLoad(NewAddr);
     385             :   NewLI->setAlignment(LI->getAlignment());
     386           0 :   NewLI->setVolatile(LI->isVolatile());
     387             :   NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
     388             :   LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
     389             : 
     390             :   Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
     391             :   LI->replaceAllUsesWith(NewVal);
     392          10 :   LI->eraseFromParent();
     393          10 :   return NewLI;
     394          10 : }
     395             : 
     396             : bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
     397          10 :   switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
     398             :   case TargetLoweringBase::AtomicExpansionKind::None:
     399             :     return false;
     400          10 :   case TargetLoweringBase::AtomicExpansionKind::LLSC:
     401             :     expandAtomicOpToLLSC(
     402          10 :         LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(),
     403             :         [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
     404          10 :     return true;
     405          10 :   case TargetLoweringBase::AtomicExpansionKind::LLOnly:
     406             :     return expandAtomicLoadToLL(LI);
     407          10 :   case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
     408             :     return expandAtomicLoadToCmpXchg(LI);
     409             :   default:
     410          10 :     llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
     411          10 :   }
     412          10 : }
     413          10 : 
     414             : bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
     415             :   IRBuilder<> Builder(LI);
     416        6995 : 
     417        6995 :   // On some architectures, load-linked instructions are atomic for larger
     418             :   // sizes than normal loads. For example, the only 64-bit load guaranteed
     419             :   // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
     420             :   Value *Val =
     421           2 :       TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
     422             :   TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
     423             : 
     424           2 :   LI->replaceAllUsesWith(Val);
     425          14 :   LI->eraseFromParent();
     426          14 : 
     427          22 :   return true;
     428          22 : }
     429           0 : 
     430           0 : bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
     431             :   IRBuilder<> Builder(LI);
     432             :   AtomicOrdering Order = LI->getOrdering();
     433             :   Value *Addr = LI->getPointerOperand();
     434           0 :   Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
     435           0 :   Constant *DummyVal = Constant::getNullValue(Ty);
     436             : 
     437             :   Value *Pair = Builder.CreateAtomicCmpXchg(
     438             :       Addr, DummyVal, DummyVal, Order,
     439             :       AtomicCmpXchgInst::getStrongestFailureOrdering(Order));
     440             :   Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
     441           0 : 
     442           0 :   LI->replaceAllUsesWith(Loaded);
     443             :   LI->eraseFromParent();
     444           0 : 
     445           0 :   return true;
     446             : }
     447           0 : 
     448             : /// Convert an atomic store of a non-integral type to an integer store of the
     449             : /// equivalent bitwidth.  We used to not support floating point or vector
     450           0 : /// atomics in the IR at all.  The backends learned to deal with the bitcast
     451           0 : /// idiom because that was the only way of expressing the notion of a atomic
     452             : /// float or vector store.  The long term plan is to teach each backend to
     453             : /// instruction select from the original atomic store, but as a migration
     454           0 : /// mechanism, we convert back to the old format which the backends understand.
     455           0 : /// Each backend will need individual work to recognize the new format.
     456             : StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
     457           0 :   IRBuilder<> Builder(SI);
     458             :   auto *M = SI->getModule();
     459             :   Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
     460           0 :                                             M->getDataLayout());
     461             :   Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
     462           0 : 
     463           0 :   Value *Addr = SI->getPointerOperand();
     464             :   Type *PT = PointerType::get(NewTy,
     465           0 :                               Addr->getType()->getPointerAddressSpace());
     466             :   Value *NewAddr = Builder.CreateBitCast(Addr, PT);
     467             : 
     468             :   StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
     469             :   NewSI->setAlignment(SI->getAlignment());
     470             :   NewSI->setVolatile(SI->isVolatile());
     471             :   NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
     472             :   LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
     473             :   SI->eraseFromParent();
     474             :   return NewSI;
     475             : }
     476          10 : 
     477          10 : bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
     478          10 :   // This function is only called on atomic stores that are too large to be
     479          20 :   // atomic if implemented as a native store. So we replace them by an
     480             :   // atomic swap, that can be implemented for example as a ldrex/strex on ARM
     481          10 :   // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
     482             :   // It is the responsibility of the target to only signal expansion via
     483             :   // shouldExpandAtomicRMW in cases where this is required and possible.
     484          10 :   IRBuilder<> Builder(SI);
     485             :   AtomicRMWInst *AI =
     486          10 :       Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, SI->getPointerOperand(),
     487             :                               SI->getValueOperand(), SI->getOrdering());
     488          10 :   SI->eraseFromParent();
     489          10 : 
     490             :   // Now we have an appropriate swap instruction, lower it as usual.
     491          10 :   return tryExpandAtomicRMW(AI);
     492             : }
     493          10 : 
     494          10 : static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
     495             :                                  Value *Loaded, Value *NewVal,
     496             :                                  AtomicOrdering MemOpOrder,
     497          44 :                                  Value *&Success, Value *&NewLoaded) {
     498             :   Value* Pair = Builder.CreateAtomicCmpXchg(
     499             :       Addr, Loaded, NewVal, MemOpOrder,
     500             :       AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
     501             :   Success = Builder.CreateExtractValue(Pair, 1, "success");
     502             :   NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
     503             : }
     504          44 : 
     505             : /// Emit IR to implement the given atomicrmw operation on values in registers,
     506          44 : /// returning the new value.
     507             : static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
     508          44 :                               Value *Loaded, Value *Inc) {
     509             :   Value *NewVal;
     510             :   switch (Op) {
     511          44 :   case AtomicRMWInst::Xchg:
     512             :     return Inc;
     513             :   case AtomicRMWInst::Add:
     514        1655 :     return Builder.CreateAdd(Loaded, Inc, "new");
     515             :   case AtomicRMWInst::Sub:
     516             :     return Builder.CreateSub(Loaded, Inc, "new");
     517             :   case AtomicRMWInst::And:
     518        1655 :     return Builder.CreateAnd(Loaded, Inc, "new");
     519             :   case AtomicRMWInst::Nand:
     520             :     return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
     521        3310 :   case AtomicRMWInst::Or:
     522        3310 :     return Builder.CreateOr(Loaded, Inc, "new");
     523        1655 :   case AtomicRMWInst::Xor:
     524             :     return Builder.CreateXor(Loaded, Inc, "new");
     525             :   case AtomicRMWInst::Max:
     526             :     NewVal = Builder.CreateICmpSGT(Loaded, Inc);
     527        2136 :     return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
     528             :   case AtomicRMWInst::Min:
     529             :     NewVal = Builder.CreateICmpSLE(Loaded, Inc);
     530        2136 :     return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
     531             :   case AtomicRMWInst::UMax:
     532             :     NewVal = Builder.CreateICmpUGT(Loaded, Inc);
     533             :     return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
     534          90 :   case AtomicRMWInst::UMin:
     535             :     NewVal = Builder.CreateICmpULE(Loaded, Inc);
     536          77 :     return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
     537             :   default:
     538         506 :     llvm_unreachable("Unknown atomic op");
     539             :   }
     540         126 : }
     541             : 
     542         508 : bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
     543             :   switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
     544         504 :   case TargetLoweringBase::AtomicExpansionKind::None:
     545             :     return false;
     546          55 :   case TargetLoweringBase::AtomicExpansionKind::LLSC: {
     547          55 :     unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
     548             :     unsigned ValueSize = getAtomicOpSize(AI);
     549          63 :     if (ValueSize < MinCASSize) {
     550          63 :       llvm_unreachable(
     551             :           "MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
     552          62 :     } else {
     553          62 :       auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
     554             :         return performAtomicOp(AI->getOperation(), Builder, Loaded,
     555          63 :                                AI->getValOperand());
     556          63 :       };
     557           0 :       expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
     558           0 :                            AI->getOrdering(), PerformOp);
     559             :     }
     560             :     return true;
     561             :   }
     562       10355 :   case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
     563       10355 :     unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
     564             :     unsigned ValueSize = getAtomicOpSize(AI);
     565             :     if (ValueSize < MinCASSize) {
     566         482 :       expandPartwordAtomicRMW(AI,
     567         482 :                               TargetLoweringBase::AtomicExpansionKind::CmpXChg);
     568         482 :     } else {
     569         482 :       expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
     570           0 :     }
     571             :     return true;
     572             :   }
     573             :   case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {
     574             :     expandAtomicRMWToMaskedIntrinsic(AI);
     575             :     return true;
     576         482 :   }
     577         482 :   default:
     578             :     llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
     579             :   }
     580         482 : }
     581             : 
     582        1655 : namespace {
     583        1655 : 
     584        1655 : /// Result values from createMaskInstrs helper.
     585        1655 : struct PartwordMaskValues {
     586           6 :   Type *WordType;
     587             :   Type *ValueType;
     588             :   Value *AlignedAddr;
     589        1649 :   Value *ShiftAmt;
     590             :   Value *Mask;
     591             :   Value *Inv_Mask;
     592             : };
     593           0 : 
     594           0 : } // end anonymous namespace
     595           0 : 
     596             : /// This is a helper function which builds instructions to provide
     597           0 : /// values necessary for partword atomic operations. It takes an
     598           0 : /// incoming address, Addr, and ValueType, and constructs the address,
     599             : /// shift-amounts and masks needed to work with a larger value of size
     600             : /// WordSize.
     601             : ///
     602             : /// AlignedAddr: Addr rounded down to a multiple of WordSize
     603             : ///
     604             : /// ShiftAmt: Number of bits to right-shift a WordSize value loaded
     605             : ///           from AlignAddr for it to have the same value as if
     606             : ///           ValueType was loaded from Addr.
     607             : ///
     608             : /// Mask: Value to mask with the value loaded from AlignAddr to
     609             : ///       include only the part that would've been loaded from Addr.
     610             : ///
     611             : /// Inv_Mask: The inverse of Mask.
     612             : static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
     613             :                                            Type *ValueType, Value *Addr,
     614             :                                            unsigned WordSize) {
     615             :   PartwordMaskValues Ret;
     616             : 
     617             :   BasicBlock *BB = I->getParent();
     618             :   Function *F = BB->getParent();
     619             :   Module *M = I->getModule();
     620             : 
     621             :   LLVMContext &Ctx = F->getContext();
     622             :   const DataLayout &DL = M->getDataLayout();
     623             : 
     624             :   unsigned ValueSize = DL.getTypeStoreSize(ValueType);
     625             : 
     626             :   assert(ValueSize < WordSize);
     627             : 
     628             :   Ret.ValueType = ValueType;
     629             :   Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
     630             : 
     631             :   Type *WordPtrType =
     632          13 :       Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
     633             : 
     634             :   Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
     635             :   Ret.AlignedAddr = Builder.CreateIntToPtr(
     636             :       Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
     637          13 :       "AlignedAddr");
     638          13 : 
     639             :   Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
     640             :   if (DL.isLittleEndian()) {
     641          13 :     // turn bytes into bits
     642          13 :     Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
     643             :   } else {
     644          13 :     // turn bytes into bits, and count from the other side.
     645             :     Ret.ShiftAmt =
     646             :         Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
     647             :   }
     648          13 : 
     649          13 :   Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
     650             :   Ret.Mask = Builder.CreateShl(
     651             :       ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt,
     652          13 :       "Mask");
     653             :   Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
     654          26 : 
     655          13 :   return Ret;
     656          13 : }
     657             : 
     658             : /// Emit IR to implement a masked version of a given atomicrmw
     659          13 : /// operation. (That is, only the bits under the Mask should be
     660          13 : /// affected by the operation)
     661             : static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,
     662           0 :                                     IRBuilder<> &Builder, Value *Loaded,
     663             :                                     Value *Shifted_Inc, Value *Inc,
     664             :                                     const PartwordMaskValues &PMV) {
     665          13 :   // TODO: update to use
     666          13 :   // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
     667             :   // to merge bits from two values without requiring PMV.Inv_Mask.
     668             :   switch (Op) {
     669          13 :   case AtomicRMWInst::Xchg: {
     670          13 :     Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
     671          13 :     Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
     672             :     return FinalVal;
     673          13 :   }
     674             :   case AtomicRMWInst::Or:
     675          13 :   case AtomicRMWInst::Xor:
     676             :   case AtomicRMWInst::And:
     677             :     llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
     678             :   case AtomicRMWInst::Add:
     679             :   case AtomicRMWInst::Sub:
     680             :   case AtomicRMWInst::Nand: {
     681           6 :     // The other arithmetic ops need to be masked into place.
     682             :     Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
     683             :     Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
     684             :     Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
     685             :     Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
     686             :     return FinalVal;
     687             :   }
     688             :   case AtomicRMWInst::Max:
     689             :   case AtomicRMWInst::Min:
     690           2 :   case AtomicRMWInst::UMax:
     691           2 :   case AtomicRMWInst::UMin: {
     692           2 :     // Finally, comparison ops will operate on the full value, so
     693             :     // truncate down to the original size, and expand out again after
     694             :     // doing the operation.
     695             :     Value *Loaded_Shiftdown = Builder.CreateTrunc(
     696             :         Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
     697             :     Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
     698           3 :     Value *NewVal_Shiftup = Builder.CreateShl(
     699             :         Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
     700             :     Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
     701             :     Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
     702           3 :     return FinalVal;
     703           3 :   }
     704           3 :   default:
     705           3 :     llvm_unreachable("Unknown atomic op");
     706           3 :   }
     707             : }
     708             : 
     709             : /// Expand a sub-word atomicrmw operation into an appropriate
     710             : /// word-sized operation.
     711             : ///
     712             : /// It will create an LL/SC or cmpxchg loop, as appropriate, the same
     713             : /// way as a typical atomicrmw expansion. The only difference here is
     714             : /// that the operation inside of the loop must operate only upon a
     715           2 : /// part of the value.
     716           2 : void AtomicExpand::expandPartwordAtomicRMW(
     717           1 :     AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {
     718           1 :   assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg);
     719           2 : 
     720           1 :   AtomicOrdering MemOpOrder = AI->getOrdering();
     721           1 : 
     722           1 :   IRBuilder<> Builder(AI);
     723             : 
     724           0 :   PartwordMaskValues PMV =
     725           0 :       createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
     726             :                        TLI->getMinCmpXchgSizeInBits() / 8);
     727             : 
     728             :   Value *ValOperand_Shifted =
     729             :       Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
     730             :                         PMV.ShiftAmt, "ValOperand_Shifted");
     731             : 
     732             :   auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
     733             :     return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
     734             :                                  ValOperand_Shifted, AI->getValOperand(), PMV);
     735             :   };
     736           0 : 
     737             :   // TODO: When we're ready to support LLSC conversions too, use
     738             :   // insertRMWLLSCLoop here for ExpansionKind==LLSC.
     739             :   Value *OldResult =
     740           0 :       insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
     741             :                            PerformPartwordOp, createCmpXchgInstFun);
     742           0 :   Value *FinalOldResult = Builder.CreateTrunc(
     743             :       Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
     744             :   AI->replaceAllUsesWith(FinalOldResult);
     745             :   AI->eraseFromParent();
     746           0 : }
     747             : 
     748             : // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
     749           0 : AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
     750           0 :   IRBuilder<> Builder(AI);
     751             :   AtomicRMWInst::BinOp Op = AI->getOperation();
     752             : 
     753             :   assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
     754             :           Op == AtomicRMWInst::And) &&
     755           0 :          "Unable to widen operation");
     756             : 
     757             :   PartwordMaskValues PMV =
     758             :       createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
     759             :                        TLI->getMinCmpXchgSizeInBits() / 8);
     760           0 : 
     761             :   Value *ValOperand_Shifted =
     762           0 :       Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
     763             :                         PMV.ShiftAmt, "ValOperand_Shifted");
     764           0 : 
     765           0 :   Value *NewOperand;
     766           0 : 
     767             :   if (Op == AtomicRMWInst::And)
     768             :     NewOperand =
     769           0 :         Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
     770           0 :   else
     771             :     NewOperand = ValOperand_Shifted;
     772             : 
     773             :   AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr,
     774             :                                                  NewOperand, AI->getOrdering());
     775             : 
     776             :   Value *FinalOldResult = Builder.CreateTrunc(
     777             :       Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType);
     778             :   AI->replaceAllUsesWith(FinalOldResult);
     779           0 :   AI->eraseFromParent();
     780             :   return NewAI;
     781             : }
     782           0 : 
     783             : void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
     784             :   // The basic idea here is that we're expanding a cmpxchg of a
     785             :   // smaller memory size up to a word-sized cmpxchg. To do this, we
     786             :   // need to add a retry-loop for strong cmpxchg, so that
     787           0 :   // modifications to other parts of the word don't cause a spurious
     788             :   // failure.
     789           0 : 
     790             :   // This generates code like the following:
     791             :   //     [[Setup mask values PMV.*]]
     792             :   //     %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
     793           0 :   //     %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
     794             :   //     %InitLoaded = load i32* %addr
     795             :   //     %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
     796           0 :   //     br partword.cmpxchg.loop
     797             :   // partword.cmpxchg.loop:
     798           0 :   //     %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
     799           0 :   //        [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
     800           0 :   //     %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
     801             :   //     %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
     802             :   //     %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
     803           0 :   //        i32 %FullWord_NewVal success_ordering failure_ordering
     804             :   //     %OldVal = extractvalue { i32, i1 } %NewCI, 0
     805             :   //     %Success = extractvalue { i32, i1 } %NewCI, 1
     806             :   //     br i1 %Success, label %partword.cmpxchg.end,
     807             :   //        label %partword.cmpxchg.failure
     808             :   // partword.cmpxchg.failure:
     809             :   //     %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
     810             :   //     %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
     811             :   //     br i1 %ShouldContinue, label %partword.cmpxchg.loop,
     812             :   //         label %partword.cmpxchg.end
     813             :   // partword.cmpxchg.end:
     814             :   //    %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
     815             :   //    %FinalOldVal = trunc i32 %tmp1 to i8
     816             :   //    %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
     817             :   //    %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
     818             : 
     819             :   Value *Addr = CI->getPointerOperand();
     820             :   Value *Cmp = CI->getCompareOperand();
     821             :   Value *NewVal = CI->getNewValOperand();
     822             : 
     823             :   BasicBlock *BB = CI->getParent();
     824             :   Function *F = BB->getParent();
     825             :   IRBuilder<> Builder(CI);
     826             :   LLVMContext &Ctx = Builder.getContext();
     827             : 
     828             :   const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8;
     829             : 
     830             :   BasicBlock *EndBB =
     831             :       BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
     832             :   auto FailureBB =
     833             :       BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
     834             :   auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
     835             : 
     836             :   // The split call above "helpfully" added a branch at the end of BB
     837             :   // (to the wrong place).
     838             :   std::prev(BB->end())->eraseFromParent();
     839             :   Builder.SetInsertPoint(BB);
     840             : 
     841             :   PartwordMaskValues PMV = createMaskInstrs(
     842             :       Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize);
     843           0 : 
     844           0 :   // Shift the incoming values over, into the right location in the word.
     845           0 :   Value *NewVal_Shifted =
     846           0 :       Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
     847             :   Value *Cmp_Shifted =
     848           0 :       Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
     849             : 
     850             :   // Load the entire current word, and mask into place the expected and new
     851           0 :   // values
     852             :   LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
     853           0 :   InitLoaded->setVolatile(CI->isVolatile());
     854           0 :   Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
     855             :   Builder.CreateBr(LoopBB);
     856             : 
     857             :   // partword.cmpxchg.loop:
     858           0 :   Builder.SetInsertPoint(LoopBB);
     859             :   PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
     860             :   Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
     861             : 
     862           0 :   // Mask/Or the expected and new values into place in the loaded word.
     863             :   Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
     864             :   Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
     865             :   AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
     866           0 :       PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
     867             :       CI->getFailureOrdering(), CI->getSyncScopeID());
     868           0 :   NewCI->setVolatile(CI->isVolatile());
     869             :   // When we're building a strong cmpxchg, we need a loop, so you
     870             :   // might think we could use a weak cmpxchg inside. But, using strong
     871             :   // allows the below comparison for ShouldContinue, and we're
     872           0 :   // expecting the underlying cmpxchg to be a machine instruction,
     873             :   // which is strong anyways.
     874           0 :   NewCI->setWeak(CI->isWeak());
     875           0 : 
     876             :   Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
     877             :   Value *Success = Builder.CreateExtractValue(NewCI, 1);
     878             : 
     879           0 :   if (CI->isWeak())
     880           0 :     Builder.CreateBr(EndBB);
     881             :   else
     882             :     Builder.CreateCondBr(Success, EndBB, FailureBB);
     883           0 : 
     884           0 :   // partword.cmpxchg.failure:
     885           0 :   Builder.SetInsertPoint(FailureBB);
     886             :   // Upon failure, verify that the masked-out part of the loaded value
     887           0 :   // has been modified.  If it didn't, abort the cmpxchg, since the
     888             :   // masked-in part must've.
     889             :   Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
     890             :   Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
     891             :   Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
     892             : 
     893             :   // Add the second value to the phi from above
     894             :   Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
     895             : 
     896           0 :   // partword.cmpxchg.end:
     897           0 :   Builder.SetInsertPoint(CI);
     898             : 
     899           0 :   Value *FinalOldVal = Builder.CreateTrunc(
     900           0 :       Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
     901             :   Value *Res = UndefValue::get(CI->getType());
     902           0 :   Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
     903             :   Res = Builder.CreateInsertValue(Res, Success, 1);
     904             : 
     905             :   CI->replaceAllUsesWith(Res);
     906             :   CI->eraseFromParent();
     907             : }
     908             : 
     909           0 : void AtomicExpand::expandAtomicOpToLLSC(
     910           0 :     Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder,
     911           0 :     function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
     912             :   IRBuilder<> Builder(I);
     913             :   Value *Loaded =
     914           0 :       insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp);
     915             : 
     916             :   I->replaceAllUsesWith(Loaded);
     917           0 :   I->eraseFromParent();
     918             : }
     919           0 : 
     920             : void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
     921           0 :   IRBuilder<> Builder(AI);
     922           0 : 
     923           0 :   PartwordMaskValues PMV =
     924             :       createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
     925           0 :                        TLI->getMinCmpXchgSizeInBits() / 8);
     926           0 : 
     927           0 :   // The value operand must be sign-extended for signed min/max so that the
     928             :   // target's signed comparison instructions can be used. Otherwise, just
     929           0 :   // zero-ext.
     930             :   Instruction::CastOps CastOp = Instruction::ZExt;
     931             :   AtomicRMWInst::BinOp RMWOp = AI->getOperation();
     932           0 :   if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
     933             :     CastOp = Instruction::SExt;
     934           0 : 
     935             :   Value *ValOperand_Shifted = Builder.CreateShl(
     936           0 :       Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
     937           0 :       PMV.ShiftAmt, "ValOperand_Shifted");
     938           0 :   Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
     939             :       Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
     940           0 :       AI->getOrdering());
     941           0 :   Value *FinalOldResult = Builder.CreateTrunc(
     942             :       Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
     943             :   AI->replaceAllUsesWith(FinalOldResult);
     944             :   AI->eraseFromParent();
     945           0 : }
     946             : 
     947             : Value *AtomicExpand::insertRMWLLSCLoop(
     948             :     IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
     949             :     AtomicOrdering MemOpOrder,
     950             :     function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
     951             :   LLVMContext &Ctx = Builder.getContext();
     952           0 :   BasicBlock *BB = Builder.GetInsertBlock();
     953             :   Function *F = BB->getParent();
     954             : 
     955           0 :   // Given: atomicrmw some_op iN* %addr, iN %incr ordering
     956             :   //
     957             :   // The standard expansion we produce is:
     958           0 :   //     [...]
     959             :   // atomicrmw.start:
     960           0 :   //     %loaded = @load.linked(%addr)
     961           0 :   //     %new = some_op iN %loaded, %incr
     962             :   //     %stored = @store_conditional(%new, %addr)
     963           0 :   //     %try_again = icmp i32 ne %stored, 0
     964           0 :   //     br i1 %try_again, label %loop, label %atomicrmw.end
     965           0 :   // atomicrmw.end:
     966             :   //     [...]
     967           0 :   BasicBlock *ExitBB =
     968             :       BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
     969             :   BasicBlock *LoopBB =  BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
     970             : 
     971           0 :   // The split call above "helpfully" added a branch at the end of BB (to the
     972           0 :   // wrong place).
     973           0 :   std::prev(BB->end())->eraseFromParent();
     974             :   Builder.SetInsertPoint(BB);
     975             :   Builder.CreateBr(LoopBB);
     976             : 
     977             :   // Start the main loop block now that we've taken care of the preliminaries.
     978             :   Builder.SetInsertPoint(LoopBB);
     979             :   Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
     980             : 
     981             :   Value *NewVal = PerformOp(Builder, Loaded);
     982             : 
     983             :   Value *StoreSuccess =
     984             :       TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
     985             :   Value *TryAgain = Builder.CreateICmpNE(
     986             :       StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
     987             :   Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
     988           0 : 
     989           0 :   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
     990             :   return Loaded;
     991             : }
     992             : 
     993           0 : /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
     994           0 : /// the equivalent bitwidth.  We used to not support pointer cmpxchg in the
     995           0 : /// IR.  As a migration step, we convert back to what use to be the standard
     996             : /// way to represent a pointer cmpxchg so that we can update backends one by
     997             : /// one.
     998             : AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
     999           0 :   auto *M = CI->getModule();
    1000             :   Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
    1001           0 :                                             M->getDataLayout());
    1002             : 
    1003             :   IRBuilder<> Builder(CI);
    1004           0 : 
    1005           0 :   Value *Addr = CI->getPointerOperand();
    1006           0 :   Type *PT = PointerType::get(NewTy,
    1007           0 :                               Addr->getType()->getPointerAddressSpace());
    1008             :   Value *NewAddr = Builder.CreateBitCast(Addr, PT);
    1009           0 : 
    1010           0 :   Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
    1011             :   Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
    1012             : 
    1013             : 
    1014             :   auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
    1015             :                                             CI->getSuccessOrdering(),
    1016             :                                             CI->getFailureOrdering(),
    1017             :                                             CI->getSyncScopeID());
    1018          10 :   NewCI->setVolatile(CI->isVolatile());
    1019          10 :   NewCI->setWeak(CI->isWeak());
    1020          20 :   LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
    1021             : 
    1022             :   Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
    1023          10 :   Value *Succ = Builder.CreateExtractValue(NewCI, 1);
    1024             : 
    1025             :   OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
    1026          10 : 
    1027             :   Value *Res = UndefValue::get(CI->getType());
    1028          10 :   Res = Builder.CreateInsertValue(Res, OldVal, 0);
    1029             :   Res = Builder.CreateInsertValue(Res, Succ, 1);
    1030          10 : 
    1031          10 :   CI->replaceAllUsesWith(Res);
    1032             :   CI->eraseFromParent();
    1033             :   return NewCI;
    1034          10 : }
    1035             : 
    1036             : bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
    1037          10 :   AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
    1038             :   AtomicOrdering FailureOrder = CI->getFailureOrdering();
    1039             :   Value *Addr = CI->getPointerOperand();
    1040             :   BasicBlock *BB = CI->getParent();
    1041             :   Function *F = BB->getParent();
    1042          20 :   LLVMContext &Ctx = F->getContext();
    1043          20 :   // If shouldInsertFencesForAtomic() returns true, then the target does not
    1044             :   // want to deal with memory orders, and emitLeading/TrailingFence should take
    1045          10 :   // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
    1046             :   // should preserve the ordering.
    1047          10 :   bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
    1048          20 :   AtomicOrdering MemOpOrder =
    1049          20 :       ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
    1050             : 
    1051          10 :   // In implementations which use a barrier to achieve release semantics, we can
    1052          10 :   // delay emitting this barrier until we know a store is actually going to be
    1053          10 :   // attempted. The cost of this delay is that we need 2 copies of the block
    1054             :   // emitting the load-linked, affecting code size.
    1055             :   //
    1056           0 :   // Ideally, this logic would be unconditional except for the minsize check
    1057             :   // since in other cases the extra blocks naturally collapse down to the
    1058             :   // minimal loop. Unfortunately, this puts too much stress on later
    1059             :   // optimisations so we avoid emitting the extra logic in those cases too.
    1060           0 :   bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
    1061           0 :                            SuccessOrder != AtomicOrdering::Monotonic &&
    1062           0 :                            SuccessOrder != AtomicOrdering::Acquire &&
    1063             :                            !F->optForMinSize();
    1064             : 
    1065             :   // There's no overhead for sinking the release barrier in a weak cmpxchg, so
    1066             :   // do it even on minsize.
    1067           0 :   bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak();
    1068             : 
    1069           0 :   // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
    1070             :   //
    1071             :   // The full expansion we produce is:
    1072             :   //     [...]
    1073             :   // cmpxchg.start:
    1074             :   //     %unreleasedload = @load.linked(%addr)
    1075             :   //     %should_store = icmp eq %unreleasedload, %desired
    1076             :   //     br i1 %should_store, label %cmpxchg.fencedstore,
    1077             :   //                          label %cmpxchg.nostore
    1078             :   // cmpxchg.releasingstore:
    1079             :   //     fence?
    1080           0 :   //     br label cmpxchg.trystore
    1081           0 :   // cmpxchg.trystore:
    1082           0 :   //     %loaded.trystore = phi [%unreleasedload, %releasingstore],
    1083             :   //                            [%releasedload, %cmpxchg.releasedload]
    1084             :   //     %stored = @store_conditional(%new, %addr)
    1085             :   //     %success = icmp eq i32 %stored, 0
    1086             :   //     br i1 %success, label %cmpxchg.success,
    1087           0 :   //                     label %cmpxchg.releasedload/%cmpxchg.failure
    1088             :   // cmpxchg.releasedload:
    1089             :   //     %releasedload = @load.linked(%addr)
    1090             :   //     %should_store = icmp eq %releasedload, %desired
    1091             :   //     br i1 %should_store, label %cmpxchg.trystore,
    1092             :   //                          label %cmpxchg.failure
    1093             :   // cmpxchg.success:
    1094             :   //     fence?
    1095             :   //     br label %cmpxchg.end
    1096             :   // cmpxchg.nostore:
    1097             :   //     %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
    1098             :   //                           [%releasedload,
    1099             :   //                               %cmpxchg.releasedload/%cmpxchg.trystore]
    1100             :   //     @load_linked_fail_balance()?
    1101             :   //     br label %cmpxchg.failure
    1102             :   // cmpxchg.failure:
    1103             :   //     fence?
    1104             :   //     br label %cmpxchg.end
    1105             :   // cmpxchg.end:
    1106             :   //     %loaded = phi [%loaded.nostore, %cmpxchg.failure],
    1107             :   //                   [%loaded.trystore, %cmpxchg.trystore]
    1108             :   //     %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
    1109             :   //     %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
    1110             :   //     %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
    1111             :   //     [...]
    1112             :   BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
    1113             :   auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
    1114             :   auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
    1115             :   auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
    1116             :   auto ReleasedLoadBB =
    1117             :       BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
    1118             :   auto TryStoreBB =
    1119             :       BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
    1120             :   auto ReleasingStoreBB =
    1121             :       BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
    1122             :   auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
    1123             : 
    1124             :   // This grabs the DebugLoc from CI
    1125             :   IRBuilder<> Builder(CI);
    1126             : 
    1127             :   // The split call above "helpfully" added a branch at the end of BB (to the
    1128             :   // wrong place), but we might want a fence too. It's easiest to just remove
    1129             :   // the branch entirely.
    1130             :   std::prev(BB->end())->eraseFromParent();
    1131             :   Builder.SetInsertPoint(BB);
    1132           0 :   if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
    1133           0 :     TLI->emitLeadingFence(Builder, CI, SuccessOrder);
    1134           0 :   Builder.CreateBr(StartBB);
    1135           0 : 
    1136             :   // Start the main loop block now that we've taken care of the preliminaries.
    1137           0 :   Builder.SetInsertPoint(StartBB);
    1138             :   Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
    1139           0 :   Value *ShouldStore = Builder.CreateICmpEQ(
    1140             :       UnreleasedLoad, CI->getCompareOperand(), "should_store");
    1141           0 : 
    1142           0 :   // If the cmpxchg doesn't actually need any ordering when it fails, we can
    1143             :   // jump straight past that fence instruction (if it exists).
    1144             :   Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
    1145           0 : 
    1146             :   Builder.SetInsertPoint(ReleasingStoreBB);
    1147             :   if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
    1148             :     TLI->emitLeadingFence(Builder, CI, SuccessOrder);
    1149             :   Builder.CreateBr(TryStoreBB);
    1150           0 : 
    1151             :   Builder.SetInsertPoint(TryStoreBB);
    1152           0 :   Value *StoreSuccess = TLI->emitStoreConditional(
    1153           0 :       Builder, CI->getNewValOperand(), Addr, MemOpOrder);
    1154           0 :   StoreSuccess = Builder.CreateICmpEQ(
    1155             :       StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
    1156             :   BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
    1157             :   Builder.CreateCondBr(StoreSuccess, SuccessBB,
    1158           0 :                        CI->isWeak() ? FailureBB : RetryBB);
    1159           0 : 
    1160             :   Builder.SetInsertPoint(ReleasedLoadBB);
    1161             :   Value *SecondLoad;
    1162             :   if (HasReleasedLoadBB) {
    1163             :     SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
    1164           0 :     ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
    1165             :                                        "should_store");
    1166             : 
    1167           0 :     // If the cmpxchg doesn't actually need any ordering when it fails, we can
    1168           0 :     // jump straight past that fence instruction (if it exists).
    1169           0 :     Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
    1170             :   } else
    1171             :     Builder.CreateUnreachable();
    1172           0 : 
    1173           0 :   // Make sure later instructions don't get reordered with a fence if
    1174           0 :   // necessary.
    1175           0 :   Builder.SetInsertPoint(SuccessBB);
    1176           0 :   if (ShouldInsertFencesForAtomic)
    1177           0 :     TLI->emitTrailingFence(Builder, CI, SuccessOrder);
    1178             :   Builder.CreateBr(ExitBB);
    1179             : 
    1180             :   Builder.SetInsertPoint(NoStoreBB);
    1181             :   // In the failing case, where we don't execute the store-conditional, the
    1182           0 :   // target might want to balance out the load-linked with a dedicated
    1183           0 :   // instruction (e.g., on ARM, clearing the exclusive monitor).
    1184           0 :   TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
    1185             :   Builder.CreateBr(FailureBB);
    1186             : 
    1187             :   Builder.SetInsertPoint(FailureBB);
    1188             :   if (ShouldInsertFencesForAtomic)
    1189           0 :     TLI->emitTrailingFence(Builder, CI, FailureOrder);
    1190             :   Builder.CreateBr(ExitBB);
    1191           0 : 
    1192             :   // Finally, we have control-flow based knowledge of whether the cmpxchg
    1193             :   // succeeded or not. We expose this to later passes by converting any
    1194             :   // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
    1195             :   // PHI.
    1196           0 :   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
    1197           0 :   PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
    1198           0 :   Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
    1199             :   Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
    1200             : 
    1201             :   // Setup the builder so we can create any PHIs we need.
    1202             :   Value *Loaded;
    1203             :   if (!HasReleasedLoadBB)
    1204           0 :     Loaded = UnreleasedLoad;
    1205           0 :   else {
    1206             :     Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
    1207             :     PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
    1208           0 :     TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
    1209           0 :     TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
    1210           0 : 
    1211             :     Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
    1212             :     PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
    1213             :     NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
    1214             :     NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
    1215             : 
    1216           0 :     Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
    1217           0 :     PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
    1218           0 :     ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
    1219           0 :     ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
    1220             : 
    1221             :     Loaded = ExitLoaded;
    1222             :   }
    1223           0 : 
    1224             :   // Look for any users of the cmpxchg that are just comparing the loaded value
    1225             :   // against the desired one, and replace them with the CFG-derived version.
    1226           0 :   SmallVector<ExtractValueInst *, 2> PrunedInsts;
    1227           0 :   for (auto User : CI->users()) {
    1228           0 :     ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);
    1229           0 :     if (!EV)
    1230             :       continue;
    1231           0 : 
    1232           0 :     assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
    1233           0 :            "weird extraction from { iN, i1 }");
    1234           0 : 
    1235             :     if (EV->getIndices()[0] == 0)
    1236           0 :       EV->replaceAllUsesWith(Loaded);
    1237           0 :     else
    1238           0 :       EV->replaceAllUsesWith(Success);
    1239           0 : 
    1240             :     PrunedInsts.push_back(EV);
    1241             :   }
    1242             : 
    1243             :   // We can remove the instructions now we're no longer iterating through them.
    1244             :   for (auto EV : PrunedInsts)
    1245             :     EV->eraseFromParent();
    1246             : 
    1247           0 :   if (!CI->use_empty()) {
    1248           0 :     // Some use of the full struct return that we don't understand has happened,
    1249           0 :     // so we've got to reconstruct it properly.
    1250           0 :     Value *Res;
    1251             :     Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
    1252             :     Res = Builder.CreateInsertValue(Res, Success, 1);
    1253             : 
    1254             :     CI->replaceAllUsesWith(Res);
    1255           0 :   }
    1256           0 : 
    1257             :   CI->eraseFromParent();
    1258           0 :   return true;
    1259             : }
    1260           0 : 
    1261             : bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
    1262             :   auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
    1263             :   if(!C)
    1264           0 :     return false;
    1265           0 : 
    1266             :   AtomicRMWInst::BinOp Op = RMWI->getOperation();
    1267           0 :   switch(Op) {
    1268             :     case AtomicRMWInst::Add:
    1269             :     case AtomicRMWInst::Sub:
    1270             :     case AtomicRMWInst::Or:
    1271           0 :     case AtomicRMWInst::Xor:
    1272           0 :       return C->isZero();
    1273             :     case AtomicRMWInst::And:
    1274           0 :       return C->isMinusOne();
    1275             :     // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
    1276             :     default:
    1277           0 :       return false;
    1278           0 :   }
    1279             : }
    1280             : 
    1281           0 : bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
    1282             :   if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
    1283             :     tryExpandAtomicLoad(ResultingLoad);
    1284           0 :     return true;
    1285             :   }
    1286             :   return false;
    1287           0 : }
    1288             : 
    1289             : Value *AtomicExpand::insertRMWCmpXchgLoop(
    1290             :     IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
    1291             :     AtomicOrdering MemOpOrder,
    1292             :     function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
    1293             :     CreateCmpXchgInstFun CreateCmpXchg) {
    1294             :   LLVMContext &Ctx = Builder.getContext();
    1295             :   BasicBlock *BB = Builder.GetInsertBlock();
    1296             :   Function *F = BB->getParent();
    1297             : 
    1298             :   // Given: atomicrmw some_op iN* %addr, iN %incr ordering
    1299             :   //
    1300             :   // The standard expansion we produce is:
    1301          23 :   //     [...]
    1302          23 :   //     %init_loaded = load atomic iN* %addr
    1303          20 :   //     br label %loop
    1304          20 :   // loop:
    1305             :   //     %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
    1306             :   //     %new = some_op iN %loaded, %incr
    1307             :   //     %pair = cmpxchg iN* %addr, iN %loaded, iN %new
    1308             :   //     %new_loaded = extractvalue { iN, i1 } %pair, 0
    1309        1656 :   //     %success = extractvalue { iN, i1 } %pair, 1
    1310             :   //     br i1 %success, label %atomicrmw.end, label %loop
    1311             :   // atomicrmw.end:
    1312             :   //     [...]
    1313             :   BasicBlock *ExitBB =
    1314        1656 :       BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
    1315        1656 :   BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
    1316        1656 : 
    1317             :   // The split call above "helpfully" added a branch at the end of BB (to the
    1318             :   // wrong place), but we want a load. It's easiest to just remove
    1319             :   // the branch entirely.
    1320             :   std::prev(BB->end())->eraseFromParent();
    1321             :   Builder.SetInsertPoint(BB);
    1322             :   LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
    1323             :   // Atomics require at least natural alignment.
    1324             :   InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8);
    1325             :   Builder.CreateBr(LoopBB);
    1326             : 
    1327             :   // Start the main loop block now that we've taken care of the preliminaries.
    1328             :   Builder.SetInsertPoint(LoopBB);
    1329             :   PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
    1330             :   Loaded->addIncoming(InitLoaded, BB);
    1331             : 
    1332             :   Value *NewVal = PerformOp(Builder, Loaded);
    1333             : 
    1334        1656 :   Value *NewLoaded = nullptr;
    1335        1656 :   Value *Success = nullptr;
    1336             : 
    1337             :   CreateCmpXchg(Builder, Addr, Loaded, NewVal,
    1338             :                 MemOpOrder == AtomicOrdering::Unordered
    1339             :                     ? AtomicOrdering::Monotonic
    1340        1656 :                     : MemOpOrder,
    1341        1656 :                 Success, NewLoaded);
    1342        1656 :   assert(Success && NewLoaded);
    1343             : 
    1344        1656 :   Loaded->addIncoming(NewLoaded, LoopBB);
    1345        1656 : 
    1346             :   Builder.CreateCondBr(Success, ExitBB, LoopBB);
    1347             : 
    1348             :   Builder.SetInsertPoint(ExitBB, ExitBB->begin());
    1349        1656 :   return NewLoaded;
    1350        1656 : }
    1351             : 
    1352        1656 : bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
    1353             :   unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
    1354        1656 :   unsigned ValueSize = getAtomicOpSize(CI);
    1355        1656 : 
    1356             :   switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
    1357        1657 :   default:
    1358             :     llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
    1359             :   case TargetLoweringBase::AtomicExpansionKind::None:
    1360             :     if (ValueSize < MinCASSize)
    1361             :       expandPartwordCmpXchg(CI);
    1362             :     return false;
    1363             :   case TargetLoweringBase::AtomicExpansionKind::LLSC: {
    1364        1656 :     assert(ValueSize >= MinCASSize &&
    1365             :            "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
    1366        1656 :     return expandAtomicCmpXchg(CI);
    1367             :   }
    1368        1656 :   case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
    1369        1656 :     llvm_unreachable(
    1370             :         "MaskedIntrinsic expansion of cmpxhg not yet implemented");
    1371             :   }
    1372        4548 : }
    1373        4548 : 
    1374        4548 : // Note: This function is exposed externally by AtomicExpandUtils.h
    1375             : bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
    1376        4548 :                                     CreateCmpXchgInstFun CreateCmpXchg) {
    1377           0 :   IRBuilder<> Builder(AI);
    1378           0 :   Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
    1379        4481 :       Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(),
    1380        4481 :       [&](IRBuilder<> &Builder, Value *Loaded) {
    1381           4 :         return performAtomicOp(AI->getOperation(), Builder, Loaded,
    1382             :                                AI->getValOperand());
    1383          67 :       },
    1384             :       CreateCmpXchg);
    1385             : 
    1386          67 :   AI->replaceAllUsesWith(Loaded);
    1387             :   AI->eraseFromParent();
    1388             :   return true;
    1389             : }
    1390             : 
    1391             : // In order to use one of the sized library calls such as
    1392             : // __atomic_fetch_add_4, the alignment must be sufficient, the size
    1393             : // must be one of the potentially-specialized sizes, and the value
    1394             : // type must actually exist in C on the target (otherwise, the
    1395        1650 : // function wouldn't actually be defined.)
    1396             : static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
    1397        1650 :                                   const DataLayout &DL) {
    1398        3300 :   // TODO: "LargestSize" is an approximation for "largest type that
    1399             :   // you can express in C". It seems to be the case that int128 is
    1400             :   // supported on all 64-bit platforms, otherwise only up to 64-bit
    1401             :   // integers are supported. If we get this wrong, then we'll try to
    1402             :   // call a sized libcall that doesn't actually exist. There should
    1403             :   // really be some more reliable way in LLVM of determining integer
    1404             :   // sizes which are valid in the target's C ABI...
    1405             :   unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
    1406        1650 :   return Align >= Size &&
    1407        1650 :          (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
    1408        1650 :          Size <= LargestSize;
    1409             : }
    1410             : 
    1411             : void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
    1412             :   static const RTLIB::Libcall Libcalls[6] = {
    1413             :       RTLIB::ATOMIC_LOAD,   RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
    1414             :       RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
    1415             :   unsigned Size = getAtomicOpSize(I);
    1416          17 :   unsigned Align = getAtomicOpAlign(I);
    1417             : 
    1418             :   bool expanded = expandAtomicOpToLibcall(
    1419             :       I, Size, Align, I->getPointerOperand(), nullptr, nullptr,
    1420             :       I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
    1421             :   (void)expanded;
    1422             :   assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
    1423             : }
    1424             : 
    1425          17 : void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
    1426          15 :   static const RTLIB::Libcall Libcalls[6] = {
    1427          17 :       RTLIB::ATOMIC_STORE,   RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
    1428          17 :       RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
    1429             :   unsigned Size = getAtomicOpSize(I);
    1430             :   unsigned Align = getAtomicOpAlign(I);
    1431           4 : 
    1432             :   bool expanded = expandAtomicOpToLibcall(
    1433             :       I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr,
    1434             :       I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
    1435           4 :   (void)expanded;
    1436             :   assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
    1437             : }
    1438           4 : 
    1439             : void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
    1440             :   static const RTLIB::Libcall Libcalls[6] = {
    1441             :       RTLIB::ATOMIC_COMPARE_EXCHANGE,   RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
    1442             :       RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
    1443           4 :       RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
    1444             :   unsigned Size = getAtomicOpSize(I);
    1445           5 :   unsigned Align = getAtomicOpAlign(I);
    1446             : 
    1447             :   bool expanded = expandAtomicOpToLibcall(
    1448             :       I, Size, Align, I->getPointerOperand(), I->getNewValOperand(),
    1449           5 :       I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),
    1450             :       Libcalls);
    1451             :   (void)expanded;
    1452           5 :   assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS");
    1453             : }
    1454             : 
    1455             : static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {
    1456             :   static const RTLIB::Libcall LibcallsXchg[6] = {
    1457           5 :       RTLIB::ATOMIC_EXCHANGE,   RTLIB::ATOMIC_EXCHANGE_1,
    1458             :       RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
    1459           4 :       RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
    1460             :   static const RTLIB::Libcall LibcallsAdd[6] = {
    1461             :       RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_ADD_1,
    1462             :       RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
    1463             :       RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
    1464           4 :   static const RTLIB::Libcall LibcallsSub[6] = {
    1465           4 :       RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_SUB_1,
    1466             :       RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
    1467           4 :       RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
    1468             :   static const RTLIB::Libcall LibcallsAnd[6] = {
    1469             :       RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_AND_1,
    1470             :       RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
    1471             :       RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
    1472             :   static const RTLIB::Libcall LibcallsOr[6] = {
    1473           4 :       RTLIB::UNKNOWN_LIBCALL,   RTLIB::ATOMIC_FETCH_OR_1,
    1474             :       RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
    1475           4 :       RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
    1476             :   static const RTLIB::Libcall LibcallsXor[6] = {
    1477             :       RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_XOR_1,
    1478             :       RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
    1479             :       RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
    1480             :   static const RTLIB::Libcall LibcallsNand[6] = {
    1481             :       RTLIB::UNKNOWN_LIBCALL,     RTLIB::ATOMIC_FETCH_NAND_1,
    1482             :       RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
    1483             :       RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
    1484             : 
    1485             :   switch (Op) {
    1486             :   case AtomicRMWInst::BAD_BINOP:
    1487             :     llvm_unreachable("Should not have BAD_BINOP.");
    1488             :   case AtomicRMWInst::Xchg:
    1489             :     return makeArrayRef(LibcallsXchg);
    1490             :   case AtomicRMWInst::Add:
    1491             :     return makeArrayRef(LibcallsAdd);
    1492             :   case AtomicRMWInst::Sub:
    1493             :     return makeArrayRef(LibcallsSub);
    1494             :   case AtomicRMWInst::And:
    1495             :     return makeArrayRef(LibcallsAnd);
    1496             :   case AtomicRMWInst::Or:
    1497             :     return makeArrayRef(LibcallsOr);
    1498             :   case AtomicRMWInst::Xor:
    1499             :     return makeArrayRef(LibcallsXor);
    1500             :   case AtomicRMWInst::Nand:
    1501             :     return makeArrayRef(LibcallsNand);
    1502             :   case AtomicRMWInst::Max:
    1503             :   case AtomicRMWInst::Min:
    1504             :   case AtomicRMWInst::UMax:
    1505           4 :   case AtomicRMWInst::UMin:
    1506             :     // No atomic libcalls are available for max/min/umax/umin.
    1507             :     return {};
    1508             :   }
    1509             :   llvm_unreachable("Unexpected AtomicRMW operation.");
    1510             : }
    1511             : 
    1512             : void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
    1513             :   ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());
    1514             : 
    1515             :   unsigned Size = getAtomicOpSize(I);
    1516             :   unsigned Align = getAtomicOpAlign(I);
    1517             : 
    1518             :   bool Success = false;
    1519             :   if (!Libcalls.empty())
    1520             :     Success = expandAtomicOpToLibcall(
    1521             :         I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr,
    1522           0 :         I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
    1523             : 
    1524             :   // The expansion failed: either there were no libcalls at all for
    1525             :   // the operation (min/max), or there were only size-specialized
    1526             :   // libcalls (add/sub/etc) and we needed a generic. So, expand to a
    1527           0 :   // CAS libcall, via a CAS loop, instead.
    1528             :   if (!Success) {
    1529           0 :     expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr,
    1530             :                                        Value *Loaded, Value *NewVal,
    1531             :                                        AtomicOrdering MemOpOrder,
    1532           4 :                                        Value *&Success, Value *&NewLoaded) {
    1533           4 :       // Create the CAS instruction normally...
    1534             :       AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
    1535           4 :           Addr, Loaded, NewVal, MemOpOrder,
    1536           4 :           AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder));
    1537             :       Success = Builder.CreateExtractValue(Pair, 1, "success");
    1538             :       NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
    1539           4 : 
    1540           4 :       // ...and then expand the CAS into a libcall.
    1541             :       expandAtomicCASToLibcall(Pair);
    1542             :     });
    1543             :   }
    1544             : }
    1545             : 
    1546             : // A helper routine for the above expandAtomic*ToLibcall functions.
    1547             : //
    1548           4 : // 'Libcalls' contains an array of enum values for the particular
    1549           2 : // ATOMIC libcalls to be emitted. All of the other arguments besides
    1550             : // 'I' are extracted from the Instruction subclass by the
    1551             : // caller. Depending on the particular call, some will be null.
    1552             : bool AtomicExpand::expandAtomicOpToLibcall(
    1553             :     Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand,
    1554             :     Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
    1555             :     AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
    1556             :   assert(Libcalls.size() == 6);
    1557             : 
    1558             :   LLVMContext &Ctx = I->getContext();
    1559             :   Module *M = I->getModule();
    1560             :   const DataLayout &DL = M->getDataLayout();
    1561             :   IRBuilder<> Builder(I);
    1562             :   IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
    1563             : 
    1564           4 :   bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL);
    1565             :   Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
    1566             : 
    1567             :   unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy);
    1568             : 
    1569             :   // TODO: the "order" argument type is "int", not int32. So
    1570             :   // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
    1571             :   ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
    1572           0 :   assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
    1573             :   Constant *OrderingVal =
    1574             :       ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
    1575             :   Constant *Ordering2Val = nullptr;
    1576             :   if (CASExpected) {
    1577             :     assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
    1578           0 :     Ordering2Val =
    1579             :         ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
    1580           0 :   }
    1581           0 :   bool HasResult = I->getType() != Type::getVoidTy(Ctx);
    1582           0 : 
    1583             :   RTLIB::Libcall RTLibType;
    1584           0 :   if (UseSizedLibcall) {
    1585           0 :     switch (Size) {
    1586             :     case 1: RTLibType = Libcalls[1]; break;
    1587           0 :     case 2: RTLibType = Libcalls[2]; break;
    1588             :     case 4: RTLibType = Libcalls[3]; break;
    1589             :     case 8: RTLibType = Libcalls[4]; break;
    1590             :     case 16: RTLibType = Libcalls[5]; break;
    1591           0 :     }
    1592             :   } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
    1593             :     RTLibType = Libcalls[0];
    1594           0 :   } else {
    1595             :     // Can't use sized function, and there's no generic for this
    1596           0 :     // operation, so give up.
    1597             :     return false;
    1598             :   }
    1599           0 : 
    1600             :   // Build up the function call. There's two kinds. First, the sized
    1601           0 :   // variants.  These calls are going to be one of the following (with
    1602             :   // N=1,2,4,8,16):
    1603             :   //  iN    __atomic_load_N(iN *ptr, int ordering)
    1604           0 :   //  void  __atomic_store_N(iN *ptr, iN val, int ordering)
    1605           0 :   //  iN    __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
    1606           0 :   //  bool  __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
    1607           0 :   //                                    int success_order, int failure_order)
    1608           0 :   //
    1609           0 :   // Note that these functions can be used for non-integer atomic
    1610           0 :   // operations, the values just need to be bitcast to integers on the
    1611             :   // way in and out.
    1612           0 :   //
    1613             :   // And, then, the generic variants. They look like the following:
    1614             :   //  void  __atomic_load(size_t size, void *ptr, void *ret, int ordering)
    1615             :   //  void  __atomic_store(size_t size, void *ptr, void *val, int ordering)
    1616             :   //  void  __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
    1617           0 :   //                          int ordering)
    1618             :   //  bool  __atomic_compare_exchange(size_t size, void *ptr, void *expected,
    1619             :   //                                  void *desired, int success_order,
    1620             :   //                                  int failure_order)
    1621             :   //
    1622             :   // The different signatures are built up depending on the
    1623             :   // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
    1624             :   // variables.
    1625             : 
    1626             :   AllocaInst *AllocaCASExpected = nullptr;
    1627             :   Value *AllocaCASExpected_i8 = nullptr;
    1628             :   AllocaInst *AllocaValue = nullptr;
    1629             :   Value *AllocaValue_i8 = nullptr;
    1630             :   AllocaInst *AllocaResult = nullptr;
    1631             :   Value *AllocaResult_i8 = nullptr;
    1632             : 
    1633             :   Type *ResultTy;
    1634             :   SmallVector<Value *, 6> Args;
    1635             :   AttributeList Attr;
    1636             : 
    1637             :   // 'size' argument.
    1638             :   if (!UseSizedLibcall) {
    1639             :     // Note, getIntPtrType is assumed equivalent to size_t.
    1640             :     Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
    1641             :   }
    1642             : 
    1643             :   // 'ptr' argument.
    1644             :   Value *PtrVal =
    1645             :       Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx));
    1646             :   Args.push_back(PtrVal);
    1647           0 : 
    1648             :   // 'expected' argument, if present.
    1649           0 :   if (CASExpected) {
    1650             :     AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
    1651           0 :     AllocaCASExpected->setAlignment(AllocaAlignment);
    1652             :     AllocaCASExpected_i8 =
    1653             :         Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx));
    1654             :     Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
    1655           0 :     Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
    1656             :     Args.push_back(AllocaCASExpected_i8);
    1657             :   }
    1658           0 : 
    1659             :   // 'val' argument ('desired' for cas), if present.
    1660           0 :   if (ValueOperand) {
    1661             :     if (UseSizedLibcall) {
    1662             :       Value *IntValue =
    1663             :           Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
    1664             :       Args.push_back(IntValue);
    1665           0 :     } else {
    1666           0 :       AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
    1667             :       AllocaValue->setAlignment(AllocaAlignment);
    1668             :       AllocaValue_i8 =
    1669           0 :           Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
    1670           0 :       Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
    1671           0 :       Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
    1672           0 :       Args.push_back(AllocaValue_i8);
    1673           0 :     }
    1674           0 :   }
    1675             : 
    1676           0 :   // 'ret' argument.
    1677             :   if (!CASExpected && HasResult && !UseSizedLibcall) {
    1678             :     AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
    1679             :     AllocaResult->setAlignment(AllocaAlignment);
    1680           0 :     AllocaResult_i8 =
    1681           0 :         Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx));
    1682             :     Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
    1683           0 :     Args.push_back(AllocaResult_i8);
    1684           0 :   }
    1685             : 
    1686           0 :   // 'ordering' ('success_order' for cas) argument.
    1687           0 :   Args.push_back(OrderingVal);
    1688           0 : 
    1689           0 :   // 'failure_order' argument, if present.
    1690           0 :   if (Ordering2Val)
    1691             :     Args.push_back(Ordering2Val);
    1692           0 : 
    1693             :   // Now, the return type.
    1694             :   if (CASExpected) {
    1695             :     ResultTy = Type::getInt1Ty(Ctx);
    1696             :     Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt);
    1697           0 :   } else if (HasResult && UseSizedLibcall)
    1698           0 :     ResultTy = SizedIntTy;
    1699           0 :   else
    1700           0 :     ResultTy = Type::getVoidTy(Ctx);
    1701           0 : 
    1702           0 :   // Done with setting up arguments and return types, create the call:
    1703           0 :   SmallVector<Type *, 6> ArgTys;
    1704             :   for (Value *Arg : Args)
    1705             :     ArgTys.push_back(Arg->getType());
    1706             :   FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
    1707           0 :   Constant *LibcallFn =
    1708             :       M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
    1709             :   CallInst *Call = Builder.CreateCall(LibcallFn, Args);
    1710           0 :   Call->setAttributes(Attr);
    1711           0 :   Value *Result = Call;
    1712             : 
    1713             :   // And then, extract the results...
    1714           0 :   if (ValueOperand && !UseSizedLibcall)
    1715           0 :     Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
    1716           0 : 
    1717           0 :   if (CASExpected) {
    1718             :     // The final result from the CAS is {load of 'expected' alloca, bool result
    1719             :     // from call}
    1720           0 :     Type *FinalResultTy = I->getType();
    1721             :     Value *V = UndefValue::get(FinalResultTy);
    1722             :     Value *ExpectedOut =
    1723             :         Builder.CreateAlignedLoad(AllocaCASExpected, AllocaAlignment);
    1724           0 :     Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
    1725           0 :     V = Builder.CreateInsertValue(V, ExpectedOut, 0);
    1726           0 :     V = Builder.CreateInsertValue(V, Result, 1);
    1727             :     I->replaceAllUsesWith(V);
    1728           0 :   } else if (HasResult) {
    1729           0 :     Value *V;
    1730             :     if (UseSizedLibcall)
    1731             :       V = Builder.CreateBitOrPointerCast(Result, I->getType());
    1732             :     else {
    1733             :       V = Builder.CreateAlignedLoad(AllocaResult, AllocaAlignment);
    1734           0 :       Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
    1735           0 :     }
    1736             :     I->replaceAllUsesWith(V);
    1737           0 :   }
    1738             :   I->eraseFromParent();
    1739             :   return true;
    1740           0 : }

Generated by: LCOV version 1.13