docs/doxygen/AtomicExpandPass_8cpp_source.html

//===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

// This file contains a pass (at IR level) to replace atomic instructions with

// __atomic_* library calls, or target specific instruction which implement the

// same semantics in a way which better fits the target backend.  This can

// include the use of (intrinsic-based) load-linked/store-conditional loops,

// AtomicCmpXchg, or type coercions.

//

//===----------------------------------------------------------------------===//


#include "llvm/ADT/ArrayRef.h"

#include "llvm/ADT/STLFunctionalExtras.h"

#include "llvm/ADT/SmallVector.h"

#include "llvm/Analysis/InstSimplifyFolder.h"

#include "llvm/Analysis/OptimizationRemarkEmitter.h"

#include "llvm/CodeGen/AtomicExpand.h"

#include "llvm/CodeGen/AtomicExpandUtils.h"

#include "llvm/CodeGen/TargetLowering.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/CodeGen/TargetSubtargetInfo.h"

#include "llvm/CodeGen/ValueTypes.h"

#include "llvm/IR/Attributes.h"

#include "llvm/IR/BasicBlock.h"

#include "llvm/IR/Constant.h"

#include "llvm/IR/Constants.h"

#include "llvm/IR/DataLayout.h"

#include "llvm/IR/DerivedTypes.h"

#include "llvm/IR/Function.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/Instruction.h"

#include "llvm/IR/Instructions.h"

#include "llvm/IR/MDBuilder.h"

#include "llvm/IR/MemoryModelRelaxationAnnotations.h"

#include "llvm/IR/Module.h"

#include "llvm/IR/Type.h"

#include "llvm/IR/User.h"

#include "llvm/IR/Value.h"

#include "llvm/InitializePasses.h"

#include "llvm/Pass.h"

#include "llvm/Support/AtomicOrdering.h"

#include "llvm/Support/Casting.h"

#include "llvm/Support/Debug.h"

#include "llvm/Support/ErrorHandling.h"

#include "llvm/Support/raw_ostream.h"

#include "llvm/Target/TargetMachine.h"

#include "llvm/Transforms/Utils/LowerAtomic.h"

#include <cassert>

#include <cstdint>

#include <iterator>


using namespace llvm;


#define DEBUG_TYPE "atomic-expand"


namespace {


class AtomicExpandImpl {

  const TargetLowering *TLI = nullptr;

  const DataLayout *DL = nullptr;


private:

  bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);

  IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);

  LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);

  bool tryExpandAtomicLoad(LoadInst *LI);

  bool expandAtomicLoadToLL(LoadInst *LI);

  bool expandAtomicLoadToCmpXchg(LoadInst *LI);

  StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);

  bool tryExpandAtomicStore(StoreInst *SI);

  void expandAtomicStore(StoreInst *SI);

  bool tryExpandAtomicRMW(AtomicRMWInst *AI);

  AtomicRMWInst *convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI);

  Value *

  insertRMWLLSCLoop(IRBuilderBase &Builder, Type *ResultTy, Value *Addr,

                    Align AddrAlign, AtomicOrdering MemOpOrder,

                    function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);

  void expandAtomicOpToLLSC(

      Instruction *I, Type *ResultTy, Value *Addr, Align AddrAlign,

      AtomicOrdering MemOpOrder,

      function_ref<Value *(IRBuilderBase &, Value *)> PerformOp);

  void expandPartwordAtomicRMW(

      AtomicRMWInst *I, TargetLoweringBase::AtomicExpansionKind ExpansionKind);

  AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);

  bool expandPartwordCmpXchg(AtomicCmpXchgInst *I);

  void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);

  void expandAtomicCmpXchgToMaskedIntrinsic(AtomicCmpXchgInst *CI);


  AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);

  static Value *insertRMWCmpXchgLoop(

      IRBuilderBase &Builder, Type *ResultType, Value *Addr, Align AddrAlign,

      AtomicOrdering MemOpOrder, SyncScope::ID SSID,

      function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,

      CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc);

  bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);


  bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);

  bool isIdempotentRMW(AtomicRMWInst *RMWI);

  bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);


  bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, Align Alignment,

                               Value *PointerOperand, Value *ValueOperand,

                               Value *CASExpected, AtomicOrdering Ordering,

                               AtomicOrdering Ordering2,

                               ArrayRef<RTLIB::Libcall> Libcalls);

  void expandAtomicLoadToLibcall(LoadInst *LI);

  void expandAtomicStoreToLibcall(StoreInst *LI);

  void expandAtomicRMWToLibcall(AtomicRMWInst *I);

  void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);


  friend bool

  llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,

                                 CreateCmpXchgInstFun CreateCmpXchg);


  bool processAtomicInstr(Instruction *I);


public:

  bool run(Function &F, const TargetMachine *TM);

};


class AtomicExpandLegacy : public FunctionPass {

public:

  static char ID; // Pass identification, replacement for typeid


  AtomicExpandLegacy() : FunctionPass(ID) {

    initializeAtomicExpandLegacyPass(*PassRegistry::getPassRegistry());

  }


  bool runOnFunction(Function &F) override;

};


// IRBuilder to be used for replacement atomic instructions.

struct ReplacementIRBuilder

    : IRBuilder<InstSimplifyFolder, IRBuilderCallbackInserter> {

  MDNode *MMRAMD = nullptr;


  // Preserves the DebugLoc from I, and preserves still valid metadata.

  // Enable StrictFP builder mode when appropriate.

  explicit ReplacementIRBuilder(Instruction *I, const DataLayout &DL)

      : IRBuilder(I->getContext(), InstSimplifyFolder(DL),

                  IRBuilderCallbackInserter(

                      [this](Instruction *I) { addMMRAMD(I); })) {

    SetInsertPoint(I);

    this->CollectMetadataToCopy(I, {LLVMContext::MD_pcsections});

    if (BB->getParent()->getAttributes().hasFnAttr(Attribute::StrictFP))

      this->setIsFPConstrained(true);


    MMRAMD = I->getMetadata(LLVMContext::MD_mmra);

  }


  void addMMRAMD(Instruction *I) {

    if (canInstructionHaveMMRAs(*I))

      I->setMetadata(LLVMContext::MD_mmra, MMRAMD);

  }

};


} // end anonymous namespace


char AtomicExpandLegacy::ID = 0;


char &llvm::AtomicExpandID = AtomicExpandLegacy::ID;


INITIALIZE_PASS_BEGIN(AtomicExpandLegacy, DEBUG_TYPE,

                      "Expand Atomic instructions", false, false)

INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)

INITIALIZE_PASS_END(AtomicExpandLegacy, DEBUG_TYPE,

                    "Expand Atomic instructions", false, false)


// Helper functions to retrieve the size of atomic instructions.

static unsigned getAtomicOpSize(LoadInst *LI) {

  const DataLayout &DL = LI->getDataLayout();

  return DL.getTypeStoreSize(LI->getType());

}


static unsigned getAtomicOpSize(StoreInst *SI) {

  const DataLayout &DL = SI->getDataLayout();

  return DL.getTypeStoreSize(SI->getValueOperand()->getType());

}


static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {

  const DataLayout &DL = RMWI->getDataLayout();

  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());

}


static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {

  const DataLayout &DL = CASI->getDataLayout();

  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());

}


/// Copy metadata that's safe to preserve when widening atomics.

static void copyMetadataForAtomic(Instruction &Dest,

                                  const Instruction &Source) {

  SmallVector<std::pair<unsigned, MDNode *>, 8> MD;

  Source.getAllMetadata(MD);

  LLVMContext &Ctx = Dest.getContext();

  MDBuilder MDB(Ctx);


  for (auto [ID, N] : MD) {

    switch (ID) {

    case LLVMContext::MD_dbg:

    case LLVMContext::MD_tbaa:

    case LLVMContext::MD_tbaa_struct:

    case LLVMContext::MD_alias_scope:

    case LLVMContext::MD_noalias:

    case LLVMContext::MD_noalias_addrspace:

    case LLVMContext::MD_access_group:

    case LLVMContext::MD_mmra:

      Dest.setMetadata(ID, N);

      break;

    default:

      if (ID == Ctx.getMDKindID("amdgpu.no.remote.memory"))

        Dest.setMetadata(ID, N);

      else if (ID == Ctx.getMDKindID("amdgpu.no.fine.grained.memory"))

        Dest.setMetadata(ID, N);


      // Losing amdgpu.ignore.denormal.mode, but it doesn't matter for current

      // uses.

      break;

    }

  }

}


// Determine if a particular atomic operation has a supported size,

// and is of appropriate alignment, to be passed through for target

// lowering. (Versus turning into a __atomic libcall)

template <typename Inst>

static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {

  unsigned Size = getAtomicOpSize(I);

  Align Alignment = I->getAlign();

  return Alignment >= Size &&

         Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;

}


bool AtomicExpandImpl::processAtomicInstr(Instruction *I) {

  auto *LI = dyn_cast<LoadInst>(I);

  auto *SI = dyn_cast<StoreInst>(I);

  auto *RMWI = dyn_cast<AtomicRMWInst>(I);

  auto *CASI = dyn_cast<AtomicCmpXchgInst>(I);


  bool MadeChange = false;


  // If the Size/Alignment is not supported, replace with a libcall.

  if (LI) {

    if (!LI->isAtomic())

      return false;


    if (!atomicSizeSupported(TLI, LI)) {

      expandAtomicLoadToLibcall(LI);

      return true;

    }


    if (TLI->shouldCastAtomicLoadInIR(LI) ==

        TargetLoweringBase::AtomicExpansionKind::CastToInteger) {

      I = LI = convertAtomicLoadToIntegerType(LI);

      MadeChange = true;

    }

  } else if (SI) {

    if (!SI->isAtomic())

      return false;


    if (!atomicSizeSupported(TLI, SI)) {

      expandAtomicStoreToLibcall(SI);

      return true;

    }


    if (TLI->shouldCastAtomicStoreInIR(SI) ==

        TargetLoweringBase::AtomicExpansionKind::CastToInteger) {

      I = SI = convertAtomicStoreToIntegerType(SI);

      MadeChange = true;

    }

  } else if (RMWI) {

    if (!atomicSizeSupported(TLI, RMWI)) {

      expandAtomicRMWToLibcall(RMWI);

      return true;

    }


    if (TLI->shouldCastAtomicRMWIInIR(RMWI) ==

        TargetLoweringBase::AtomicExpansionKind::CastToInteger) {

      I = RMWI = convertAtomicXchgToIntegerType(RMWI);

      MadeChange = true;

    }

  } else if (CASI) {

    if (!atomicSizeSupported(TLI, CASI)) {

      expandAtomicCASToLibcall(CASI);

      return true;

    }


    // TODO: when we're ready to make the change at the IR level, we can

    // extend convertCmpXchgToInteger for floating point too.

    if (CASI->getCompareOperand()->getType()->isPointerTy()) {

      // TODO: add a TLI hook to control this so that each target can

      // convert to lowering the original type one at a time.

      I = CASI = convertCmpXchgToIntegerType(CASI);

      MadeChange = true;

    }

  } else

    return false;


  if (TLI->shouldInsertFencesForAtomic(I)) {

    auto FenceOrdering = AtomicOrdering::Monotonic;

    if (LI && isAcquireOrStronger(LI->getOrdering())) {

      FenceOrdering = LI->getOrdering();

      LI->setOrdering(AtomicOrdering::Monotonic);

    } else if (SI && isReleaseOrStronger(SI->getOrdering())) {

      FenceOrdering = SI->getOrdering();

      SI->setOrdering(AtomicOrdering::Monotonic);

    } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||

                        isAcquireOrStronger(RMWI->getOrdering()))) {

      FenceOrdering = RMWI->getOrdering();

      RMWI->setOrdering(AtomicOrdering::Monotonic);

    } else if (CASI &&

               TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==

                   TargetLoweringBase::AtomicExpansionKind::None &&

               (isReleaseOrStronger(CASI->getSuccessOrdering()) ||

                isAcquireOrStronger(CASI->getSuccessOrdering()) ||

                isAcquireOrStronger(CASI->getFailureOrdering()))) {

      // If a compare and swap is lowered to LL/SC, we can do smarter fence

      // insertion, with a stronger one on the success path than on the

      // failure path. As a result, fence insertion is directly done by

      // expandAtomicCmpXchg in that case.

      FenceOrdering = CASI->getMergedOrdering();

      CASI->setSuccessOrdering(AtomicOrdering::Monotonic);

      CASI->setFailureOrdering(AtomicOrdering::Monotonic);

    }


    if (FenceOrdering != AtomicOrdering::Monotonic) {

      MadeChange |= bracketInstWithFences(I, FenceOrdering);

    }

  } else if (I->hasAtomicStore() &&

             TLI->shouldInsertTrailingFenceForAtomicStore(I)) {

    auto FenceOrdering = AtomicOrdering::Monotonic;

    if (SI)

      FenceOrdering = SI->getOrdering();

    else if (RMWI)

      FenceOrdering = RMWI->getOrdering();

    else if (CASI && TLI->shouldExpandAtomicCmpXchgInIR(CASI) !=

                         TargetLoweringBase::AtomicExpansionKind::LLSC)

      // LLSC is handled in expandAtomicCmpXchg().

      FenceOrdering = CASI->getSuccessOrdering();


    IRBuilder Builder(I);

    if (auto TrailingFence =

            TLI->emitTrailingFence(Builder, I, FenceOrdering)) {

      TrailingFence->moveAfter(I);

      MadeChange = true;

    }

  }


  if (LI)

    MadeChange |= tryExpandAtomicLoad(LI);

  else if (SI)

    MadeChange |= tryExpandAtomicStore(SI);

  else if (RMWI) {

    // There are two different ways of expanding RMW instructions:

    // - into a load if it is idempotent

    // - into a Cmpxchg/LL-SC loop otherwise

    // we try them in that order.


    if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {

      MadeChange = true;


    } else {

      MadeChange |= tryExpandAtomicRMW(RMWI);

    }

  } else if (CASI)

    MadeChange |= tryExpandAtomicCmpXchg(CASI);


  return MadeChange;

}


bool AtomicExpandImpl::run(Function &F, const TargetMachine *TM) {

  const auto *Subtarget = TM->getSubtargetImpl(F);

  if (!Subtarget->enableAtomicExpand())

    return false;

  TLI = Subtarget->getTargetLowering();

  DL = &F.getDataLayout();


  bool MadeChange = false;


  for (Function::iterator BBI = F.begin(), BBE = F.end(); BBI != BBE; ++BBI) {

    BasicBlock *BB = &*BBI;


    BasicBlock::reverse_iterator Next;


    for (BasicBlock::reverse_iterator I = BB->rbegin(), E = BB->rend(); I != E;

         I = Next) {

      Instruction &Inst = *I;

      Next = std::next(I);


      if (processAtomicInstr(&Inst)) {

        MadeChange = true;


        // New blocks may have been inserted.

        BBE = F.end();

      }

    }

  }


  return MadeChange;

}


bool AtomicExpandLegacy::runOnFunction(Function &F) {


  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();

  if (!TPC)

    return false;

  auto *TM = &TPC->getTM<TargetMachine>();

  AtomicExpandImpl AE;

  return AE.run(F, TM);

}


FunctionPass *llvm::createAtomicExpandLegacyPass() {

  return new AtomicExpandLegacy();

}


PreservedAnalyses AtomicExpandPass::run(Function &F,

                                        FunctionAnalysisManager &AM) {

  AtomicExpandImpl AE;


  bool Changed = AE.run(F, TM);

  if (!Changed)

    return PreservedAnalyses::all();


  return PreservedAnalyses::none();

}


bool AtomicExpandImpl::bracketInstWithFences(Instruction *I,

                                             AtomicOrdering Order) {

  ReplacementIRBuilder Builder(I, *DL);


  auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);


  auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);

  // We have a guard here because not every atomic operation generates a

  // trailing fence.

  if (TrailingFence)

    TrailingFence->moveAfter(I);


  return (LeadingFence || TrailingFence);

}


/// Get the iX type with the same bitwidth as T.

IntegerType *

AtomicExpandImpl::getCorrespondingIntegerType(Type *T, const DataLayout &DL) {

  EVT VT = TLI->getMemValueType(DL, T);

  unsigned BitWidth = VT.getStoreSizeInBits();

  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");

  return IntegerType::get(T->getContext(), BitWidth);

}


/// Convert an atomic load of a non-integral type to an integer load of the

/// equivalent bitwidth.  See the function comment on

/// convertAtomicStoreToIntegerType for background.

LoadInst *AtomicExpandImpl::convertAtomicLoadToIntegerType(LoadInst *LI) {

  auto *M = LI->getModule();

  Type *NewTy = getCorrespondingIntegerType(LI->getType(), M->getDataLayout());


  ReplacementIRBuilder Builder(LI, *DL);


  Value *Addr = LI->getPointerOperand();


  auto *NewLI = Builder.CreateLoad(NewTy, Addr);

  NewLI->setAlignment(LI->getAlign());

  NewLI->setVolatile(LI->isVolatile());

  NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());

  LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");


  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());

  LI->replaceAllUsesWith(NewVal);

  LI->eraseFromParent();

  return NewLI;

}


AtomicRMWInst *

AtomicExpandImpl::convertAtomicXchgToIntegerType(AtomicRMWInst *RMWI) {

  assert(RMWI->getOperation() == AtomicRMWInst::Xchg);


  auto *M = RMWI->getModule();

  Type *NewTy =

      getCorrespondingIntegerType(RMWI->getType(), M->getDataLayout());


  ReplacementIRBuilder Builder(RMWI, *DL);


  Value *Addr = RMWI->getPointerOperand();

  Value *Val = RMWI->getValOperand();

  Value *NewVal = Val->getType()->isPointerTy()

                      ? Builder.CreatePtrToInt(Val, NewTy)

                      : Builder.CreateBitCast(Val, NewTy);


  auto *NewRMWI = Builder.CreateAtomicRMW(AtomicRMWInst::Xchg, Addr, NewVal,

                                          RMWI->getAlign(), RMWI->getOrdering(),

                                          RMWI->getSyncScopeID());

  NewRMWI->setVolatile(RMWI->isVolatile());

  copyMetadataForAtomic(*NewRMWI, *RMWI);

  LLVM_DEBUG(dbgs() << "Replaced " << *RMWI << " with " << *NewRMWI << "\n");


  Value *NewRVal = RMWI->getType()->isPointerTy()

                       ? Builder.CreateIntToPtr(NewRMWI, RMWI->getType())

                       : Builder.CreateBitCast(NewRMWI, RMWI->getType());

  RMWI->replaceAllUsesWith(NewRVal);

  RMWI->eraseFromParent();

  return NewRMWI;

}


bool AtomicExpandImpl::tryExpandAtomicLoad(LoadInst *LI) {

  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {

  case TargetLoweringBase::AtomicExpansionKind::None:

    return false;

  case TargetLoweringBase::AtomicExpansionKind::LLSC:

    expandAtomicOpToLLSC(

        LI, LI->getType(), LI->getPointerOperand(), LI->getAlign(),

        LI->getOrdering(),

        [](IRBuilderBase &Builder, Value *Loaded) { return Loaded; });

    return true;

  case TargetLoweringBase::AtomicExpansionKind::LLOnly:

    return expandAtomicLoadToLL(LI);

  case TargetLoweringBase::AtomicExpansionKind::CmpXChg:

    return expandAtomicLoadToCmpXchg(LI);

  case TargetLoweringBase::AtomicExpansionKind::NotAtomic:

    LI->setAtomic(AtomicOrdering::NotAtomic);

    return true;

  default:

    llvm_unreachable("Unhandled case in tryExpandAtomicLoad");

  }

}


bool AtomicExpandImpl::tryExpandAtomicStore(StoreInst *SI) {

  switch (TLI->shouldExpandAtomicStoreInIR(SI)) {

  case TargetLoweringBase::AtomicExpansionKind::None:

    return false;

  case TargetLoweringBase::AtomicExpansionKind::Expand:

    expandAtomicStore(SI);

    return true;

  case TargetLoweringBase::AtomicExpansionKind::NotAtomic:

    SI->setAtomic(AtomicOrdering::NotAtomic);

    return true;

  default:

    llvm_unreachable("Unhandled case in tryExpandAtomicStore");

  }

}


bool AtomicExpandImpl::expandAtomicLoadToLL(LoadInst *LI) {

  ReplacementIRBuilder Builder(LI, *DL);


  // On some architectures, load-linked instructions are atomic for larger

  // sizes than normal loads. For example, the only 64-bit load guaranteed

  // to be single-copy atomic by ARM is an ldrexd (A3.5.3).

  Value *Val = TLI->emitLoadLinked(Builder, LI->getType(),

                                   LI->getPointerOperand(), LI->getOrdering());

  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);


  LI->replaceAllUsesWith(Val);

  LI->eraseFromParent();


  return true;

}


bool AtomicExpandImpl::expandAtomicLoadToCmpXchg(LoadInst *LI) {

  ReplacementIRBuilder Builder(LI, *DL);

  AtomicOrdering Order = LI->getOrdering();

  if (Order == AtomicOrdering::Unordered)

    Order = AtomicOrdering::Monotonic;


  Value *Addr = LI->getPointerOperand();

  Type *Ty = LI->getType();

  Constant *DummyVal = Constant::getNullValue(Ty);


  Value *Pair = Builder.CreateAtomicCmpXchg(

      Addr, DummyVal, DummyVal, LI->getAlign(), Order,

      AtomicCmpXchgInst::getStrongestFailureOrdering(Order));

  Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");


  LI->replaceAllUsesWith(Loaded);

  LI->eraseFromParent();


  return true;

}


/// Convert an atomic store of a non-integral type to an integer store of the

/// equivalent bitwidth.  We used to not support floating point or vector

/// atomics in the IR at all.  The backends learned to deal with the bitcast

/// idiom because that was the only way of expressing the notion of a atomic

/// float or vector store.  The long term plan is to teach each backend to

/// instruction select from the original atomic store, but as a migration

/// mechanism, we convert back to the old format which the backends understand.

/// Each backend will need individual work to recognize the new format.

StoreInst *AtomicExpandImpl::convertAtomicStoreToIntegerType(StoreInst *SI) {

  ReplacementIRBuilder Builder(SI, *DL);

  auto *M = SI->getModule();

  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),

                                            M->getDataLayout());

  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);


  Value *Addr = SI->getPointerOperand();


  StoreInst *NewSI = Builder.CreateStore(NewVal, Addr);

  NewSI->setAlignment(SI->getAlign());

  NewSI->setVolatile(SI->isVolatile());

  NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());

  LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");

  SI->eraseFromParent();

  return NewSI;

}


void AtomicExpandImpl::expandAtomicStore(StoreInst *SI) {

  // This function is only called on atomic stores that are too large to be

  // atomic if implemented as a native store. So we replace them by an

  // atomic swap, that can be implemented for example as a ldrex/strex on ARM

  // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.

  // It is the responsibility of the target to only signal expansion via

  // shouldExpandAtomicRMW in cases where this is required and possible.

  ReplacementIRBuilder Builder(SI, *DL);

  AtomicOrdering Ordering = SI->getOrdering();

  assert(Ordering != AtomicOrdering::NotAtomic);

  AtomicOrdering RMWOrdering = Ordering == AtomicOrdering::Unordered

                                   ? AtomicOrdering::Monotonic

                                   : Ordering;

  AtomicRMWInst *AI = Builder.CreateAtomicRMW(

      AtomicRMWInst::Xchg, SI->getPointerOperand(), SI->getValueOperand(),

      SI->getAlign(), RMWOrdering);

  SI->eraseFromParent();


  // Now we have an appropriate swap instruction, lower it as usual.

  tryExpandAtomicRMW(AI);

}


static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr,

                                 Value *Loaded, Value *NewVal, Align AddrAlign,

                                 AtomicOrdering MemOpOrder, SyncScope::ID SSID,

                                 Value *&Success, Value *&NewLoaded,

                                 Instruction *MetadataSrc) {

  Type *OrigTy = NewVal->getType();


  // This code can go away when cmpxchg supports FP and vector types.

  assert(!OrigTy->isPointerTy());

  bool NeedBitcast = OrigTy->isFloatingPointTy() || OrigTy->isVectorTy();

  if (NeedBitcast) {

    IntegerType *IntTy = Builder.getIntNTy(OrigTy->getPrimitiveSizeInBits());

    NewVal = Builder.CreateBitCast(NewVal, IntTy);

    Loaded = Builder.CreateBitCast(Loaded, IntTy);

  }


  AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(

      Addr, Loaded, NewVal, AddrAlign, MemOpOrder,

      AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID);

  if (MetadataSrc)

    copyMetadataForAtomic(*Pair, *MetadataSrc);


  Success = Builder.CreateExtractValue(Pair, 1, "success");

  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");


  if (NeedBitcast)

    NewLoaded = Builder.CreateBitCast(NewLoaded, OrigTy);

}


bool AtomicExpandImpl::tryExpandAtomicRMW(AtomicRMWInst *AI) {

  LLVMContext &Ctx = AI->getModule()->getContext();

  TargetLowering::AtomicExpansionKind Kind = TLI->shouldExpandAtomicRMWInIR(AI);

  switch (Kind) {

  case TargetLoweringBase::AtomicExpansionKind::None:

    return false;

  case TargetLoweringBase::AtomicExpansionKind::LLSC: {

    unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;

    unsigned ValueSize = getAtomicOpSize(AI);

    if (ValueSize < MinCASSize) {

      expandPartwordAtomicRMW(AI,

                              TargetLoweringBase::AtomicExpansionKind::LLSC);

    } else {

      auto PerformOp = [&](IRBuilderBase &Builder, Value *Loaded) {

        return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,

                                   AI->getValOperand());

      };

      expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),

                           AI->getAlign(), AI->getOrdering(), PerformOp);

    }

    return true;

  }

  case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {

    unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;

    unsigned ValueSize = getAtomicOpSize(AI);

    if (ValueSize < MinCASSize) {

      expandPartwordAtomicRMW(AI,

                              TargetLoweringBase::AtomicExpansionKind::CmpXChg);

    } else {

      SmallVector<StringRef> SSNs;

      Ctx.getSyncScopeNames(SSNs);

      auto MemScope = SSNs[AI->getSyncScopeID()].empty()

                          ? "system"

                          : SSNs[AI->getSyncScopeID()];

      OptimizationRemarkEmitter ORE(AI->getFunction());

      ORE.emit([&]() {

        return OptimizationRemark(DEBUG_TYPE, "Passed", AI)

               << "A compare and swap loop was generated for an atomic "

               << AI->getOperationName(AI->getOperation()) << " operation at "

               << MemScope << " memory scope";

      });

      expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);

    }

    return true;

  }

  case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic: {

    unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;

    unsigned ValueSize = getAtomicOpSize(AI);

    if (ValueSize < MinCASSize) {

      AtomicRMWInst::BinOp Op = AI->getOperation();

      // Widen And/Or/Xor and give the target another chance at expanding it.

      if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||

          Op == AtomicRMWInst::And) {

        tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));

        return true;

      }

    }

    expandAtomicRMWToMaskedIntrinsic(AI);

    return true;

  }

  case TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic: {

    TLI->emitBitTestAtomicRMWIntrinsic(AI);

    return true;

  }

  case TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic: {

    TLI->emitCmpArithAtomicRMWIntrinsic(AI);

    return true;

  }

  case TargetLoweringBase::AtomicExpansionKind::NotAtomic:

    return lowerAtomicRMWInst(AI);

  case TargetLoweringBase::AtomicExpansionKind::Expand:

    TLI->emitExpandAtomicRMW(AI);

    return true;

  default:

    llvm_unreachable("Unhandled case in tryExpandAtomicRMW");

  }

}


namespace {


struct PartwordMaskValues {

  // These three fields are guaranteed to be set by createMaskInstrs.

  Type *WordType = nullptr;

  Type *ValueType = nullptr;

  Type *IntValueType = nullptr;

  Value *AlignedAddr = nullptr;

  Align AlignedAddrAlignment;

  // The remaining fields can be null.

  Value *ShiftAmt = nullptr;

  Value *Mask = nullptr;

  Value *Inv_Mask = nullptr;

};


LLVM_ATTRIBUTE_UNUSED

raw_ostream &operator<<(raw_ostream &O, const PartwordMaskValues &PMV) {

  auto PrintObj = [&O](auto *V) {

    if (V)

      O << *V;

    else

      O << "nullptr";

    O << '\n';

  };

  O << "PartwordMaskValues {\n";

  O << "  WordType: ";

  PrintObj(PMV.WordType);

  O << "  ValueType: ";

  PrintObj(PMV.ValueType);

  O << "  AlignedAddr: ";

  PrintObj(PMV.AlignedAddr);

  O << "  AlignedAddrAlignment: " << PMV.AlignedAddrAlignment.value() << '\n';

  O << "  ShiftAmt: ";

  PrintObj(PMV.ShiftAmt);

  O << "  Mask: ";

  PrintObj(PMV.Mask);

  O << "  Inv_Mask: ";

  PrintObj(PMV.Inv_Mask);

  O << "}\n";

  return O;

}


} // end anonymous namespace


/// This is a helper function which builds instructions to provide

/// values necessary for partword atomic operations. It takes an

/// incoming address, Addr, and ValueType, and constructs the address,

/// shift-amounts and masks needed to work with a larger value of size

/// WordSize.

///

/// AlignedAddr: Addr rounded down to a multiple of WordSize

///

/// ShiftAmt: Number of bits to right-shift a WordSize value loaded

///           from AlignAddr for it to have the same value as if

///           ValueType was loaded from Addr.

///

/// Mask: Value to mask with the value loaded from AlignAddr to

///       include only the part that would've been loaded from Addr.

///

/// Inv_Mask: The inverse of Mask.

static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder,

                                           Instruction *I, Type *ValueType,

                                           Value *Addr, Align AddrAlign,

                                           unsigned MinWordSize) {

  PartwordMaskValues PMV;


  Module *M = I->getModule();

  LLVMContext &Ctx = M->getContext();

  const DataLayout &DL = M->getDataLayout();

  unsigned ValueSize = DL.getTypeStoreSize(ValueType);


  PMV.ValueType = PMV.IntValueType = ValueType;

  if (PMV.ValueType->isFloatingPointTy() || PMV.ValueType->isVectorTy())

    PMV.IntValueType =

        Type::getIntNTy(Ctx, ValueType->getPrimitiveSizeInBits());


  PMV.WordType = MinWordSize > ValueSize ? Type::getIntNTy(Ctx, MinWordSize * 8)

                                         : ValueType;

  if (PMV.ValueType == PMV.WordType) {

    PMV.AlignedAddr = Addr;

    PMV.AlignedAddrAlignment = AddrAlign;

    PMV.ShiftAmt = ConstantInt::get(PMV.ValueType, 0);

    PMV.Mask = ConstantInt::get(PMV.ValueType, ~0, /*isSigned*/ true);

    return PMV;

  }


  PMV.AlignedAddrAlignment = Align(MinWordSize);


  assert(ValueSize < MinWordSize);


  PointerType *PtrTy = cast<PointerType>(Addr->getType());

  IntegerType *IntTy = DL.getIndexType(Ctx, PtrTy->getAddressSpace());

  Value *PtrLSB;


  if (AddrAlign < MinWordSize) {

    PMV.AlignedAddr = Builder.CreateIntrinsic(

        Intrinsic::ptrmask, {PtrTy, IntTy},

        {Addr, ConstantInt::get(IntTy, ~(uint64_t)(MinWordSize - 1))}, nullptr,

        "AlignedAddr");


    Value *AddrInt = Builder.CreatePtrToInt(Addr, IntTy);

    PtrLSB = Builder.CreateAnd(AddrInt, MinWordSize - 1, "PtrLSB");

  } else {

    // If the alignment is high enough, the LSB are known 0.

    PMV.AlignedAddr = Addr;

    PtrLSB = ConstantInt::getNullValue(IntTy);

  }


  if (DL.isLittleEndian()) {

    // turn bytes into bits

    PMV.ShiftAmt = Builder.CreateShl(PtrLSB, 3);

  } else {

    // turn bytes into bits, and count from the other side.

    PMV.ShiftAmt = Builder.CreateShl(

        Builder.CreateXor(PtrLSB, MinWordSize - ValueSize), 3);

  }


  PMV.ShiftAmt = Builder.CreateTrunc(PMV.ShiftAmt, PMV.WordType, "ShiftAmt");

  PMV.Mask = Builder.CreateShl(

      ConstantInt::get(PMV.WordType, (1 << (ValueSize * 8)) - 1), PMV.ShiftAmt,

      "Mask");


  PMV.Inv_Mask = Builder.CreateNot(PMV.Mask, "Inv_Mask");


  return PMV;

}


static Value *extractMaskedValue(IRBuilderBase &Builder, Value *WideWord,

                                 const PartwordMaskValues &PMV) {

  assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");

  if (PMV.WordType == PMV.ValueType)

    return WideWord;


  Value *Shift = Builder.CreateLShr(WideWord, PMV.ShiftAmt, "shifted");

  Value *Trunc = Builder.CreateTrunc(Shift, PMV.IntValueType, "extracted");

  return Builder.CreateBitCast(Trunc, PMV.ValueType);

}


static Value *insertMaskedValue(IRBuilderBase &Builder, Value *WideWord,

                                Value *Updated, const PartwordMaskValues &PMV) {

  assert(WideWord->getType() == PMV.WordType && "Widened type mismatch");

  assert(Updated->getType() == PMV.ValueType && "Value type mismatch");

  if (PMV.WordType == PMV.ValueType)

    return Updated;


  Updated = Builder.CreateBitCast(Updated, PMV.IntValueType);


  Value *ZExt = Builder.CreateZExt(Updated, PMV.WordType, "extended");

  Value *Shift =

      Builder.CreateShl(ZExt, PMV.ShiftAmt, "shifted", /*HasNUW*/ true);

  Value *And = Builder.CreateAnd(WideWord, PMV.Inv_Mask, "unmasked");

  Value *Or = Builder.CreateOr(And, Shift, "inserted");

  return Or;

}


/// Emit IR to implement a masked version of a given atomicrmw

/// operation. (That is, only the bits under the Mask should be

/// affected by the operation)

static Value *performMaskedAtomicOp(AtomicRMWInst::BinOp Op,

                                    IRBuilderBase &Builder, Value *Loaded,

                                    Value *Shifted_Inc, Value *Inc,

                                    const PartwordMaskValues &PMV) {

  // TODO: update to use

  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order

  // to merge bits from two values without requiring PMV.Inv_Mask.

  switch (Op) {

  case AtomicRMWInst::Xchg: {

    Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);

    Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);

    return FinalVal;

  }

  case AtomicRMWInst::Or:

  case AtomicRMWInst::Xor:

  case AtomicRMWInst::And:

    llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");

  case AtomicRMWInst::Add:

  case AtomicRMWInst::Sub:

  case AtomicRMWInst::Nand: {

    // The other arithmetic ops need to be masked into place.

    Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded, Shifted_Inc);

    Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);

    Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);

    Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);

    return FinalVal;

  }

  case AtomicRMWInst::Max:

  case AtomicRMWInst::Min:

  case AtomicRMWInst::UMax:

  case AtomicRMWInst::UMin:

  case AtomicRMWInst::FAdd:

  case AtomicRMWInst::FSub:

  case AtomicRMWInst::FMin:

  case AtomicRMWInst::FMax:

  case AtomicRMWInst::UIncWrap:

  case AtomicRMWInst::UDecWrap:

  case AtomicRMWInst::USubCond:

  case AtomicRMWInst::USubSat: {

    // Finally, other ops will operate on the full value, so truncate down to

    // the original size, and expand out again after doing the

    // operation. Bitcasts will be inserted for FP values.

    Value *Loaded_Extract = extractMaskedValue(Builder, Loaded, PMV);

    Value *NewVal = buildAtomicRMWValue(Op, Builder, Loaded_Extract, Inc);

    Value *FinalVal = insertMaskedValue(Builder, Loaded, NewVal, PMV);

    return FinalVal;

  }

  default:

    llvm_unreachable("Unknown atomic op");

  }

}


/// Expand a sub-word atomicrmw operation into an appropriate

/// word-sized operation.

///

/// It will create an LL/SC or cmpxchg loop, as appropriate, the same

/// way as a typical atomicrmw expansion. The only difference here is

/// that the operation inside of the loop may operate upon only a

/// part of the value.

void AtomicExpandImpl::expandPartwordAtomicRMW(

    AtomicRMWInst *AI, TargetLoweringBase::AtomicExpansionKind ExpansionKind) {

  // Widen And/Or/Xor and give the target another chance at expanding it.

  AtomicRMWInst::BinOp Op = AI->getOperation();

  if (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||

      Op == AtomicRMWInst::And) {

    tryExpandAtomicRMW(widenPartwordAtomicRMW(AI));

    return;

  }

  AtomicOrdering MemOpOrder = AI->getOrdering();

  SyncScope::ID SSID = AI->getSyncScopeID();


  ReplacementIRBuilder Builder(AI, *DL);


  PartwordMaskValues PMV =

      createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),

                       AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);


  Value *ValOperand_Shifted = nullptr;

  if (Op == AtomicRMWInst::Xchg || Op == AtomicRMWInst::Add ||

      Op == AtomicRMWInst::Sub || Op == AtomicRMWInst::Nand) {

    Value *ValOp = Builder.CreateBitCast(AI->getValOperand(), PMV.IntValueType);

    ValOperand_Shifted =

        Builder.CreateShl(Builder.CreateZExt(ValOp, PMV.WordType), PMV.ShiftAmt,

                          "ValOperand_Shifted");

  }


  auto PerformPartwordOp = [&](IRBuilderBase &Builder, Value *Loaded) {

    return performMaskedAtomicOp(Op, Builder, Loaded, ValOperand_Shifted,

                                 AI->getValOperand(), PMV);

  };


  Value *OldResult;

  if (ExpansionKind == TargetLoweringBase::AtomicExpansionKind::CmpXChg) {

    OldResult = insertRMWCmpXchgLoop(

        Builder, PMV.WordType, PMV.AlignedAddr, PMV.AlignedAddrAlignment,

        MemOpOrder, SSID, PerformPartwordOp, createCmpXchgInstFun, AI);

  } else {

    assert(ExpansionKind == TargetLoweringBase::AtomicExpansionKind::LLSC);

    OldResult = insertRMWLLSCLoop(Builder, PMV.WordType, PMV.AlignedAddr,

                                  PMV.AlignedAddrAlignment, MemOpOrder,

                                  PerformPartwordOp);

  }


  Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);

  AI->replaceAllUsesWith(FinalOldResult);

  AI->eraseFromParent();

}


// Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.

AtomicRMWInst *AtomicExpandImpl::widenPartwordAtomicRMW(AtomicRMWInst *AI) {

  ReplacementIRBuilder Builder(AI, *DL);

  AtomicRMWInst::BinOp Op = AI->getOperation();


  assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||

          Op == AtomicRMWInst::And) &&

         "Unable to widen operation");


  PartwordMaskValues PMV =

      createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),

                       AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);


  Value *ValOperand_Shifted =

      Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),

                        PMV.ShiftAmt, "ValOperand_Shifted");


  Value *NewOperand;


  if (Op == AtomicRMWInst::And)

    NewOperand =

        Builder.CreateOr(ValOperand_Shifted, PMV.Inv_Mask, "AndOperand");

  else

    NewOperand = ValOperand_Shifted;


  AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(

      Op, PMV.AlignedAddr, NewOperand, PMV.AlignedAddrAlignment,

      AI->getOrdering(), AI->getSyncScopeID());


  copyMetadataForAtomic(*NewAI, *AI);


  Value *FinalOldResult = extractMaskedValue(Builder, NewAI, PMV);

  AI->replaceAllUsesWith(FinalOldResult);

  AI->eraseFromParent();

  return NewAI;

}


bool AtomicExpandImpl::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {

  // The basic idea here is that we're expanding a cmpxchg of a

  // smaller memory size up to a word-sized cmpxchg. To do this, we

  // need to add a retry-loop for strong cmpxchg, so that

  // modifications to other parts of the word don't cause a spurious

  // failure.


  // This generates code like the following:

  //     [[Setup mask values PMV.*]]

  //     %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt

  //     %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt

  //     %InitLoaded = load i32* %addr

  //     %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask

  //     br partword.cmpxchg.loop

  // partword.cmpxchg.loop:

  //     %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],

  //        [ %OldVal_MaskOut, %partword.cmpxchg.failure ]

  //     %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted

  //     %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted

  //     %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,

  //        i32 %FullWord_NewVal success_ordering failure_ordering

  //     %OldVal = extractvalue { i32, i1 } %NewCI, 0

  //     %Success = extractvalue { i32, i1 } %NewCI, 1

  //     br i1 %Success, label %partword.cmpxchg.end,

  //        label %partword.cmpxchg.failure

  // partword.cmpxchg.failure:

  //     %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask

  //     %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut

  //     br i1 %ShouldContinue, label %partword.cmpxchg.loop,

  //         label %partword.cmpxchg.end

  // partword.cmpxchg.end:

  //    %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt

  //    %FinalOldVal = trunc i32 %tmp1 to i8

  //    %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0

  //    %Res = insertvalue { i8, i1 } %25, i1 %Success, 1


  Value *Addr = CI->getPointerOperand();

  Value *Cmp = CI->getCompareOperand();

  Value *NewVal = CI->getNewValOperand();


  BasicBlock *BB = CI->getParent();

  Function *F = BB->getParent();

  ReplacementIRBuilder Builder(CI, *DL);

  LLVMContext &Ctx = Builder.getContext();


  BasicBlock *EndBB =

      BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");

  auto FailureBB =

      BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);

  auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);


  // The split call above "helpfully" added a branch at the end of BB

  // (to the wrong place).

  std::prev(BB->end())->eraseFromParent();

  Builder.SetInsertPoint(BB);


  PartwordMaskValues PMV =

      createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,

                       CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);


  // Shift the incoming values over, into the right location in the word.

  Value *NewVal_Shifted =

      Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);

  Value *Cmp_Shifted =

      Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);


  // Load the entire current word, and mask into place the expected and new

  // values

  LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);

  InitLoaded->setVolatile(CI->isVolatile());

  Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);

  Builder.CreateBr(LoopBB);


  // partword.cmpxchg.loop:

  Builder.SetInsertPoint(LoopBB);

  PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);

  Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);


  // Mask/Or the expected and new values into place in the loaded word.

  Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);

  Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);

  AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(

      PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, PMV.AlignedAddrAlignment,

      CI->getSuccessOrdering(), CI->getFailureOrdering(), CI->getSyncScopeID());

  NewCI->setVolatile(CI->isVolatile());

  // When we're building a strong cmpxchg, we need a loop, so you

  // might think we could use a weak cmpxchg inside. But, using strong

  // allows the below comparison for ShouldContinue, and we're

  // expecting the underlying cmpxchg to be a machine instruction,

  // which is strong anyways.

  NewCI->setWeak(CI->isWeak());


  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);

  Value *Success = Builder.CreateExtractValue(NewCI, 1);


  if (CI->isWeak())

    Builder.CreateBr(EndBB);

  else

    Builder.CreateCondBr(Success, EndBB, FailureBB);


  // partword.cmpxchg.failure:

  Builder.SetInsertPoint(FailureBB);

  // Upon failure, verify that the masked-out part of the loaded value

  // has been modified.  If it didn't, abort the cmpxchg, since the

  // masked-in part must've.

  Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);

  Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);

  Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);


  // Add the second value to the phi from above

  Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);


  // partword.cmpxchg.end:

  Builder.SetInsertPoint(CI);


  Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);

  Value *Res = PoisonValue::get(CI->getType());

  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);

  Res = Builder.CreateInsertValue(Res, Success, 1);


  CI->replaceAllUsesWith(Res);

  CI->eraseFromParent();

  return true;

}


void AtomicExpandImpl::expandAtomicOpToLLSC(

    Instruction *I, Type *ResultType, Value *Addr, Align AddrAlign,

    AtomicOrdering MemOpOrder,

    function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {

  ReplacementIRBuilder Builder(I, *DL);

  Value *Loaded = insertRMWLLSCLoop(Builder, ResultType, Addr, AddrAlign,

                                    MemOpOrder, PerformOp);


  I->replaceAllUsesWith(Loaded);

  I->eraseFromParent();

}


void AtomicExpandImpl::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {

  ReplacementIRBuilder Builder(AI, *DL);


  PartwordMaskValues PMV =

      createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),

                       AI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);


  // The value operand must be sign-extended for signed min/max so that the

  // target's signed comparison instructions can be used. Otherwise, just

  // zero-ext.

  Instruction::CastOps CastOp = Instruction::ZExt;

  AtomicRMWInst::BinOp RMWOp = AI->getOperation();

  if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)

    CastOp = Instruction::SExt;


  Value *ValOperand_Shifted = Builder.CreateShl(

      Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),

      PMV.ShiftAmt, "ValOperand_Shifted");

  Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(

      Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,

      AI->getOrdering());

  Value *FinalOldResult = extractMaskedValue(Builder, OldResult, PMV);

  AI->replaceAllUsesWith(FinalOldResult);

  AI->eraseFromParent();

}


void AtomicExpandImpl::expandAtomicCmpXchgToMaskedIntrinsic(

    AtomicCmpXchgInst *CI) {

  ReplacementIRBuilder Builder(CI, *DL);


  PartwordMaskValues PMV = createMaskInstrs(

      Builder, CI, CI->getCompareOperand()->getType(), CI->getPointerOperand(),

      CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);


  Value *CmpVal_Shifted = Builder.CreateShl(

      Builder.CreateZExt(CI->getCompareOperand(), PMV.WordType), PMV.ShiftAmt,

      "CmpVal_Shifted");

  Value *NewVal_Shifted = Builder.CreateShl(

      Builder.CreateZExt(CI->getNewValOperand(), PMV.WordType), PMV.ShiftAmt,

      "NewVal_Shifted");

  Value *OldVal = TLI->emitMaskedAtomicCmpXchgIntrinsic(

      Builder, CI, PMV.AlignedAddr, CmpVal_Shifted, NewVal_Shifted, PMV.Mask,

      CI->getMergedOrdering());

  Value *FinalOldVal = extractMaskedValue(Builder, OldVal, PMV);

  Value *Res = PoisonValue::get(CI->getType());

  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);

  Value *Success = Builder.CreateICmpEQ(

      CmpVal_Shifted, Builder.CreateAnd(OldVal, PMV.Mask), "Success");

  Res = Builder.CreateInsertValue(Res, Success, 1);


  CI->replaceAllUsesWith(Res);

  CI->eraseFromParent();

}


Value *AtomicExpandImpl::insertRMWLLSCLoop(

    IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,

    AtomicOrdering MemOpOrder,

    function_ref<Value *(IRBuilderBase &, Value *)> PerformOp) {

  LLVMContext &Ctx = Builder.getContext();

  BasicBlock *BB = Builder.GetInsertBlock();

  Function *F = BB->getParent();


  assert(AddrAlign >=

             F->getDataLayout().getTypeStoreSize(ResultTy) &&

         "Expected at least natural alignment at this point.");


  // Given: atomicrmw some_op iN* %addr, iN %incr ordering

  //

  // The standard expansion we produce is:

  //     [...]

  // atomicrmw.start:

  //     %loaded = @load.linked(%addr)

  //     %new = some_op iN %loaded, %incr

  //     %stored = @store_conditional(%new, %addr)

  //     %try_again = icmp i32 ne %stored, 0

  //     br i1 %try_again, label %loop, label %atomicrmw.end

  // atomicrmw.end:

  //     [...]

  BasicBlock *ExitBB =

      BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");

  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);


  // The split call above "helpfully" added a branch at the end of BB (to the

  // wrong place).

  std::prev(BB->end())->eraseFromParent();

  Builder.SetInsertPoint(BB);

  Builder.CreateBr(LoopBB);


  // Start the main loop block now that we've taken care of the preliminaries.

  Builder.SetInsertPoint(LoopBB);

  Value *Loaded = TLI->emitLoadLinked(Builder, ResultTy, Addr, MemOpOrder);


  Value *NewVal = PerformOp(Builder, Loaded);


  Value *StoreSuccess =

      TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);

  Value *TryAgain = Builder.CreateICmpNE(

      StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");

  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);


  Builder.SetInsertPoint(ExitBB, ExitBB->begin());

  return Loaded;

}


/// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of

/// the equivalent bitwidth.  We used to not support pointer cmpxchg in the

/// IR.  As a migration step, we convert back to what use to be the standard

/// way to represent a pointer cmpxchg so that we can update backends one by

/// one.

AtomicCmpXchgInst *

AtomicExpandImpl::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {

  auto *M = CI->getModule();

  Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),

                                            M->getDataLayout());


  ReplacementIRBuilder Builder(CI, *DL);


  Value *Addr = CI->getPointerOperand();


  Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);

  Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);


  auto *NewCI = Builder.CreateAtomicCmpXchg(

      Addr, NewCmp, NewNewVal, CI->getAlign(), CI->getSuccessOrdering(),

      CI->getFailureOrdering(), CI->getSyncScopeID());

  NewCI->setVolatile(CI->isVolatile());

  NewCI->setWeak(CI->isWeak());

  LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");


  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);

  Value *Succ = Builder.CreateExtractValue(NewCI, 1);


  OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());


  Value *Res = PoisonValue::get(CI->getType());

  Res = Builder.CreateInsertValue(Res, OldVal, 0);

  Res = Builder.CreateInsertValue(Res, Succ, 1);


  CI->replaceAllUsesWith(Res);

  CI->eraseFromParent();

  return NewCI;

}


bool AtomicExpandImpl::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {

  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();

  AtomicOrdering FailureOrder = CI->getFailureOrdering();

  Value *Addr = CI->getPointerOperand();

  BasicBlock *BB = CI->getParent();

  Function *F = BB->getParent();

  LLVMContext &Ctx = F->getContext();

  // If shouldInsertFencesForAtomic() returns true, then the target does not

  // want to deal with memory orders, and emitLeading/TrailingFence should take

  // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we

  // should preserve the ordering.

  bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);

  AtomicOrdering MemOpOrder = ShouldInsertFencesForAtomic

                                  ? AtomicOrdering::Monotonic

                                  : CI->getMergedOrdering();


  // In implementations which use a barrier to achieve release semantics, we can

  // delay emitting this barrier until we know a store is actually going to be

  // attempted. The cost of this delay is that we need 2 copies of the block

  // emitting the load-linked, affecting code size.

  //

  // Ideally, this logic would be unconditional except for the minsize check

  // since in other cases the extra blocks naturally collapse down to the

  // minimal loop. Unfortunately, this puts too much stress on later

  // optimisations so we avoid emitting the extra logic in those cases too.

  bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&

                           SuccessOrder != AtomicOrdering::Monotonic &&

                           SuccessOrder != AtomicOrdering::Acquire &&

                           !F->hasMinSize();


  // There's no overhead for sinking the release barrier in a weak cmpxchg, so

  // do it even on minsize.

  bool UseUnconditionalReleaseBarrier = F->hasMinSize() && !CI->isWeak();


  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord

  //

  // The full expansion we produce is:

  //     [...]

  // %aligned.addr = ...

  // cmpxchg.start:

  //     %unreleasedload = @load.linked(%aligned.addr)

  //     %unreleasedload.extract = extract value from %unreleasedload

  //     %should_store = icmp eq %unreleasedload.extract, %desired

  //     br i1 %should_store, label %cmpxchg.releasingstore,

  //                          label %cmpxchg.nostore

  // cmpxchg.releasingstore:

  //     fence?

  //     br label cmpxchg.trystore

  // cmpxchg.trystore:

  //     %loaded.trystore = phi [%unreleasedload, %cmpxchg.releasingstore],

  //                            [%releasedload, %cmpxchg.releasedload]

  //     %updated.new = insert %new into %loaded.trystore

  //     %stored = @store_conditional(%updated.new, %aligned.addr)

  //     %success = icmp eq i32 %stored, 0

  //     br i1 %success, label %cmpxchg.success,

  //                     label %cmpxchg.releasedload/%cmpxchg.failure

  // cmpxchg.releasedload:

  //     %releasedload = @load.linked(%aligned.addr)

  //     %releasedload.extract = extract value from %releasedload

  //     %should_store = icmp eq %releasedload.extract, %desired

  //     br i1 %should_store, label %cmpxchg.trystore,

  //                          label %cmpxchg.failure

  // cmpxchg.success:

  //     fence?

  //     br label %cmpxchg.end

  // cmpxchg.nostore:

  //     %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],

  //                           [%releasedload,

  //                               %cmpxchg.releasedload/%cmpxchg.trystore]

  //     @load_linked_fail_balance()?

  //     br label %cmpxchg.failure

  // cmpxchg.failure:

  //     fence?

  //     br label %cmpxchg.end

  // cmpxchg.end:

  //     %loaded.exit = phi [%loaded.nostore, %cmpxchg.failure],

  //                        [%loaded.trystore, %cmpxchg.trystore]

  //     %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]

  //     %loaded = extract value from %loaded.exit

  //     %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0

  //     %res = insertvalue { iN, i1 } %restmp, i1 %success, 1

  //     [...]

  BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");

  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);

  auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);

  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);

  auto ReleasedLoadBB =

      BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);

  auto TryStoreBB =

      BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);

  auto ReleasingStoreBB =

      BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);

  auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);


  ReplacementIRBuilder Builder(CI, *DL);


  // The split call above "helpfully" added a branch at the end of BB (to the

  // wrong place), but we might want a fence too. It's easiest to just remove

  // the branch entirely.

  std::prev(BB->end())->eraseFromParent();

  Builder.SetInsertPoint(BB);

  if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)

    TLI->emitLeadingFence(Builder, CI, SuccessOrder);


  PartwordMaskValues PMV =

      createMaskInstrs(Builder, CI, CI->getCompareOperand()->getType(), Addr,

                       CI->getAlign(), TLI->getMinCmpXchgSizeInBits() / 8);

  Builder.CreateBr(StartBB);


  // Start the main loop block now that we've taken care of the preliminaries.

  Builder.SetInsertPoint(StartBB);

  Value *UnreleasedLoad =

      TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);

  Value *UnreleasedLoadExtract =

      extractMaskedValue(Builder, UnreleasedLoad, PMV);

  Value *ShouldStore = Builder.CreateICmpEQ(

      UnreleasedLoadExtract, CI->getCompareOperand(), "should_store");


  // If the cmpxchg doesn't actually need any ordering when it fails, we can

  // jump straight past that fence instruction (if it exists).

  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);


  Builder.SetInsertPoint(ReleasingStoreBB);

  if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)

    TLI->emitLeadingFence(Builder, CI, SuccessOrder);

  Builder.CreateBr(TryStoreBB);


  Builder.SetInsertPoint(TryStoreBB);

  PHINode *LoadedTryStore =

      Builder.CreatePHI(PMV.WordType, 2, "loaded.trystore");

  LoadedTryStore->addIncoming(UnreleasedLoad, ReleasingStoreBB);

  Value *NewValueInsert =

      insertMaskedValue(Builder, LoadedTryStore, CI->getNewValOperand(), PMV);

  Value *StoreSuccess = TLI->emitStoreConditional(Builder, NewValueInsert,

                                                  PMV.AlignedAddr, MemOpOrder);

  StoreSuccess = Builder.CreateICmpEQ(

      StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");

  BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;

  Builder.CreateCondBr(StoreSuccess, SuccessBB,

                       CI->isWeak() ? FailureBB : RetryBB);


  Builder.SetInsertPoint(ReleasedLoadBB);

  Value *SecondLoad;

  if (HasReleasedLoadBB) {

    SecondLoad =

        TLI->emitLoadLinked(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder);

    Value *SecondLoadExtract = extractMaskedValue(Builder, SecondLoad, PMV);

    ShouldStore = Builder.CreateICmpEQ(SecondLoadExtract,

                                       CI->getCompareOperand(), "should_store");


    // If the cmpxchg doesn't actually need any ordering when it fails, we can

    // jump straight past that fence instruction (if it exists).

    Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);

    // Update PHI node in TryStoreBB.

    LoadedTryStore->addIncoming(SecondLoad, ReleasedLoadBB);

  } else

    Builder.CreateUnreachable();


  // Make sure later instructions don't get reordered with a fence if

  // necessary.

  Builder.SetInsertPoint(SuccessBB);

  if (ShouldInsertFencesForAtomic ||

      TLI->shouldInsertTrailingFenceForAtomicStore(CI))

    TLI->emitTrailingFence(Builder, CI, SuccessOrder);

  Builder.CreateBr(ExitBB);


  Builder.SetInsertPoint(NoStoreBB);

  PHINode *LoadedNoStore =

      Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.nostore");

  LoadedNoStore->addIncoming(UnreleasedLoad, StartBB);

  if (HasReleasedLoadBB)

    LoadedNoStore->addIncoming(SecondLoad, ReleasedLoadBB);


  // In the failing case, where we don't execute the store-conditional, the

  // target might want to balance out the load-linked with a dedicated

  // instruction (e.g., on ARM, clearing the exclusive monitor).

  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);

  Builder.CreateBr(FailureBB);


  Builder.SetInsertPoint(FailureBB);

  PHINode *LoadedFailure =

      Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.failure");

  LoadedFailure->addIncoming(LoadedNoStore, NoStoreBB);

  if (CI->isWeak())

    LoadedFailure->addIncoming(LoadedTryStore, TryStoreBB);

  if (ShouldInsertFencesForAtomic)

    TLI->emitTrailingFence(Builder, CI, FailureOrder);

  Builder.CreateBr(ExitBB);


  // Finally, we have control-flow based knowledge of whether the cmpxchg

  // succeeded or not. We expose this to later passes by converting any

  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate

  // PHI.

  Builder.SetInsertPoint(ExitBB, ExitBB->begin());

  PHINode *LoadedExit =

      Builder.CreatePHI(UnreleasedLoad->getType(), 2, "loaded.exit");

  LoadedExit->addIncoming(LoadedTryStore, SuccessBB);

  LoadedExit->addIncoming(LoadedFailure, FailureBB);

  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2, "success");

  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);

  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);


  // This is the "exit value" from the cmpxchg expansion. It may be of

  // a type wider than the one in the cmpxchg instruction.

  Value *LoadedFull = LoadedExit;


  Builder.SetInsertPoint(ExitBB, std::next(Success->getIterator()));

  Value *Loaded = extractMaskedValue(Builder, LoadedFull, PMV);


  // Look for any users of the cmpxchg that are just comparing the loaded value

  // against the desired one, and replace them with the CFG-derived version.

  SmallVector<ExtractValueInst *, 2> PrunedInsts;

  for (auto *User : CI->users()) {

    ExtractValueInst *EV = dyn_cast<ExtractValueInst>(User);

    if (!EV)

      continue;


    assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&

           "weird extraction from { iN, i1 }");


    if (EV->getIndices()[0] == 0)

      EV->replaceAllUsesWith(Loaded);

    else

      EV->replaceAllUsesWith(Success);


    PrunedInsts.push_back(EV);

  }


  // We can remove the instructions now we're no longer iterating through them.

  for (auto *EV : PrunedInsts)

    EV->eraseFromParent();


  if (!CI->use_empty()) {

    // Some use of the full struct return that we don't understand has happened,

    // so we've got to reconstruct it properly.

    Value *Res;

    Res = Builder.CreateInsertValue(PoisonValue::get(CI->getType()), Loaded, 0);

    Res = Builder.CreateInsertValue(Res, Success, 1);


    CI->replaceAllUsesWith(Res);

  }


  CI->eraseFromParent();

  return true;

}


bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {

  auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());

  if (!C)

    return false;


  AtomicRMWInst::BinOp Op = RMWI->getOperation();

  switch (Op) {

  case AtomicRMWInst::Add:

  case AtomicRMWInst::Sub:

  case AtomicRMWInst::Or:

  case AtomicRMWInst::Xor:

    return C->isZero();

  case AtomicRMWInst::And:

    return C->isMinusOne();

  // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...

  default:

    return false;

  }

}


bool AtomicExpandImpl::simplifyIdempotentRMW(AtomicRMWInst *RMWI) {

  if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {

    tryExpandAtomicLoad(ResultingLoad);

    return true;

  }

  return false;

}


Value *AtomicExpandImpl::insertRMWCmpXchgLoop(

    IRBuilderBase &Builder, Type *ResultTy, Value *Addr, Align AddrAlign,

    AtomicOrdering MemOpOrder, SyncScope::ID SSID,

    function_ref<Value *(IRBuilderBase &, Value *)> PerformOp,

    CreateCmpXchgInstFun CreateCmpXchg, Instruction *MetadataSrc) {

  LLVMContext &Ctx = Builder.getContext();

  BasicBlock *BB = Builder.GetInsertBlock();

  Function *F = BB->getParent();


  // Given: atomicrmw some_op iN* %addr, iN %incr ordering

  //

  // The standard expansion we produce is:

  //     [...]

  //     %init_loaded = load atomic iN* %addr

  //     br label %loop

  // loop:

  //     %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]

  //     %new = some_op iN %loaded, %incr

  //     %pair = cmpxchg iN* %addr, iN %loaded, iN %new

  //     %new_loaded = extractvalue { iN, i1 } %pair, 0

  //     %success = extractvalue { iN, i1 } %pair, 1

  //     br i1 %success, label %atomicrmw.end, label %loop

  // atomicrmw.end:

  //     [...]

  BasicBlock *ExitBB =

      BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");

  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);


  // The split call above "helpfully" added a branch at the end of BB (to the

  // wrong place), but we want a load. It's easiest to just remove

  // the branch entirely.

  std::prev(BB->end())->eraseFromParent();

  Builder.SetInsertPoint(BB);

  LoadInst *InitLoaded = Builder.CreateAlignedLoad(ResultTy, Addr, AddrAlign);

  Builder.CreateBr(LoopBB);


  // Start the main loop block now that we've taken care of the preliminaries.

  Builder.SetInsertPoint(LoopBB);

  PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");

  Loaded->addIncoming(InitLoaded, BB);


  Value *NewVal = PerformOp(Builder, Loaded);


  Value *NewLoaded = nullptr;

  Value *Success = nullptr;


  CreateCmpXchg(Builder, Addr, Loaded, NewVal, AddrAlign,

                MemOpOrder == AtomicOrdering::Unordered

                    ? AtomicOrdering::Monotonic

                    : MemOpOrder,

                SSID, Success, NewLoaded, MetadataSrc);

  assert(Success && NewLoaded);


  Loaded->addIncoming(NewLoaded, LoopBB);


  Builder.CreateCondBr(Success, ExitBB, LoopBB);


  Builder.SetInsertPoint(ExitBB, ExitBB->begin());

  return NewLoaded;

}


bool AtomicExpandImpl::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {

  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;

  unsigned ValueSize = getAtomicOpSize(CI);


  switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {

  default:

    llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");

  case TargetLoweringBase::AtomicExpansionKind::None:

    if (ValueSize < MinCASSize)

      return expandPartwordCmpXchg(CI);

    return false;

  case TargetLoweringBase::AtomicExpansionKind::LLSC: {

    return expandAtomicCmpXchg(CI);

  }

  case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:

    expandAtomicCmpXchgToMaskedIntrinsic(CI);

    return true;

  case TargetLoweringBase::AtomicExpansionKind::NotAtomic:

    return lowerAtomicCmpXchgInst(CI);

  case TargetLoweringBase::AtomicExpansionKind::Expand: {

    TLI->emitExpandAtomicCmpXchg(CI);

    return true;

  }

  }

}


// Note: This function is exposed externally by AtomicExpandUtils.h

bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,

                                    CreateCmpXchgInstFun CreateCmpXchg) {

  ReplacementIRBuilder Builder(AI, AI->getDataLayout());

  Builder.setIsFPConstrained(

      AI->getFunction()->hasFnAttribute(Attribute::StrictFP));


  // FIXME: If FP exceptions are observable, we should force them off for the

  // loop for the FP atomics.

  Value *Loaded = AtomicExpandImpl::insertRMWCmpXchgLoop(

      Builder, AI->getType(), AI->getPointerOperand(), AI->getAlign(),

      AI->getOrdering(), AI->getSyncScopeID(),

      [&](IRBuilderBase &Builder, Value *Loaded) {

        return buildAtomicRMWValue(AI->getOperation(), Builder, Loaded,

                                   AI->getValOperand());

      },

      CreateCmpXchg, /*MetadataSrc=*/AI);


  AI->replaceAllUsesWith(Loaded);

  AI->eraseFromParent();

  return true;

}


// In order to use one of the sized library calls such as

// __atomic_fetch_add_4, the alignment must be sufficient, the size

// must be one of the potentially-specialized sizes, and the value

// type must actually exist in C on the target (otherwise, the

// function wouldn't actually be defined.)

static bool canUseSizedAtomicCall(unsigned Size, Align Alignment,

                                  const DataLayout &DL) {

  // TODO: "LargestSize" is an approximation for "largest type that

  // you can express in C". It seems to be the case that int128 is

  // supported on all 64-bit platforms, otherwise only up to 64-bit

  // integers are supported. If we get this wrong, then we'll try to

  // call a sized libcall that doesn't actually exist. There should

  // really be some more reliable way in LLVM of determining integer

  // sizes which are valid in the target's C ABI...

  unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;

  return Alignment >= Size &&

         (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&

         Size <= LargestSize;

}


void AtomicExpandImpl::expandAtomicLoadToLibcall(LoadInst *I) {

  static const RTLIB::Libcall Libcalls[6] = {

      RTLIB::ATOMIC_LOAD,   RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,

      RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};

  unsigned Size = getAtomicOpSize(I);


  bool expanded = expandAtomicOpToLibcall(

      I, Size, I->getAlign(), I->getPointerOperand(), nullptr, nullptr,

      I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);

  if (!expanded)

    report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Load");

}


void AtomicExpandImpl::expandAtomicStoreToLibcall(StoreInst *I) {

  static const RTLIB::Libcall Libcalls[6] = {

      RTLIB::ATOMIC_STORE,   RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,

      RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};

  unsigned Size = getAtomicOpSize(I);


  bool expanded = expandAtomicOpToLibcall(

      I, Size, I->getAlign(), I->getPointerOperand(), I->getValueOperand(),

      nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);

  if (!expanded)

    report_fatal_error("expandAtomicOpToLibcall shouldn't fail for Store");

}


void AtomicExpandImpl::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {

  static const RTLIB::Libcall Libcalls[6] = {

      RTLIB::ATOMIC_COMPARE_EXCHANGE,   RTLIB::ATOMIC_COMPARE_EXCHANGE_1,

      RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,

      RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};

  unsigned Size = getAtomicOpSize(I);


  bool expanded = expandAtomicOpToLibcall(

      I, Size, I->getAlign(), I->getPointerOperand(), I->getNewValOperand(),

      I->getCompareOperand(), I->getSuccessOrdering(), I->getFailureOrdering(),

      Libcalls);

  if (!expanded)

    report_fatal_error("expandAtomicOpToLibcall shouldn't fail for CAS");

}


static ArrayRef<RTLIB::Libcall> GetRMWLibcall(AtomicRMWInst::BinOp Op) {

  static const RTLIB::Libcall LibcallsXchg[6] = {

      RTLIB::ATOMIC_EXCHANGE,   RTLIB::ATOMIC_EXCHANGE_1,

      RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,

      RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};

  static const RTLIB::Libcall LibcallsAdd[6] = {

      RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_ADD_1,

      RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,

      RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};

  static const RTLIB::Libcall LibcallsSub[6] = {

      RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_SUB_1,

      RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,

      RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};

  static const RTLIB::Libcall LibcallsAnd[6] = {

      RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_AND_1,

      RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,

      RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};

  static const RTLIB::Libcall LibcallsOr[6] = {

      RTLIB::UNKNOWN_LIBCALL,   RTLIB::ATOMIC_FETCH_OR_1,

      RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,

      RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};

  static const RTLIB::Libcall LibcallsXor[6] = {

      RTLIB::UNKNOWN_LIBCALL,    RTLIB::ATOMIC_FETCH_XOR_1,

      RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,

      RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};

  static const RTLIB::Libcall LibcallsNand[6] = {

      RTLIB::UNKNOWN_LIBCALL,     RTLIB::ATOMIC_FETCH_NAND_1,

      RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,

      RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};


  switch (Op) {

  case AtomicRMWInst::BAD_BINOP:

    llvm_unreachable("Should not have BAD_BINOP.");

  case AtomicRMWInst::Xchg:

    return ArrayRef(LibcallsXchg);

  case AtomicRMWInst::Add:

    return ArrayRef(LibcallsAdd);

  case AtomicRMWInst::Sub:

    return ArrayRef(LibcallsSub);

  case AtomicRMWInst::And:

    return ArrayRef(LibcallsAnd);

  case AtomicRMWInst::Or:

    return ArrayRef(LibcallsOr);

  case AtomicRMWInst::Xor:

    return ArrayRef(LibcallsXor);

  case AtomicRMWInst::Nand:

    return ArrayRef(LibcallsNand);

  case AtomicRMWInst::Max:

  case AtomicRMWInst::Min:

  case AtomicRMWInst::UMax:

  case AtomicRMWInst::UMin:

  case AtomicRMWInst::FMax:

  case AtomicRMWInst::FMin:

  case AtomicRMWInst::FAdd:

  case AtomicRMWInst::FSub:

  case AtomicRMWInst::UIncWrap:

  case AtomicRMWInst::UDecWrap:

  case AtomicRMWInst::USubCond:

  case AtomicRMWInst::USubSat:

    // No atomic libcalls are available for these.

    return {};

  }

  llvm_unreachable("Unexpected AtomicRMW operation.");

}


void AtomicExpandImpl::expandAtomicRMWToLibcall(AtomicRMWInst *I) {

  ArrayRef<RTLIB::Libcall> Libcalls = GetRMWLibcall(I->getOperation());


  unsigned Size = getAtomicOpSize(I);


  bool Success = false;

  if (!Libcalls.empty())

    Success = expandAtomicOpToLibcall(

        I, Size, I->getAlign(), I->getPointerOperand(), I->getValOperand(),

        nullptr, I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);


  // The expansion failed: either there were no libcalls at all for

  // the operation (min/max), or there were only size-specialized

  // libcalls (add/sub/etc) and we needed a generic. So, expand to a

  // CAS libcall, via a CAS loop, instead.

  if (!Success) {

    expandAtomicRMWToCmpXchg(

        I, [this](IRBuilderBase &Builder, Value *Addr, Value *Loaded,

                  Value *NewVal, Align Alignment, AtomicOrdering MemOpOrder,

                  SyncScope::ID SSID, Value *&Success, Value *&NewLoaded,

                  Instruction *MetadataSrc) {

          // Create the CAS instruction normally...

          AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(

              Addr, Loaded, NewVal, Alignment, MemOpOrder,

              AtomicCmpXchgInst::getStrongestFailureOrdering(MemOpOrder), SSID);

          if (MetadataSrc)

            copyMetadataForAtomic(*Pair, *MetadataSrc);


          Success = Builder.CreateExtractValue(Pair, 1, "success");

          NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");


          // ...and then expand the CAS into a libcall.

          expandAtomicCASToLibcall(Pair);

        });

  }

}


// A helper routine for the above expandAtomic*ToLibcall functions.

//

// 'Libcalls' contains an array of enum values for the particular

// ATOMIC libcalls to be emitted. All of the other arguments besides

// 'I' are extracted from the Instruction subclass by the

// caller. Depending on the particular call, some will be null.

bool AtomicExpandImpl::expandAtomicOpToLibcall(

    Instruction *I, unsigned Size, Align Alignment, Value *PointerOperand,

    Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,

    AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {

  assert(Libcalls.size() == 6);


  LLVMContext &Ctx = I->getContext();

  Module *M = I->getModule();

  const DataLayout &DL = M->getDataLayout();

  IRBuilder<> Builder(I);

  IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());


  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Alignment, DL);

  Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);


  const Align AllocaAlignment = DL.getPrefTypeAlign(SizedIntTy);


  // TODO: the "order" argument type is "int", not int32. So

  // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.

  ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);

  assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");

  Constant *OrderingVal =

      ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));

  Constant *Ordering2Val = nullptr;

  if (CASExpected) {

    assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");

    Ordering2Val =

        ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));

  }

  bool HasResult = I->getType() != Type::getVoidTy(Ctx);


  RTLIB::Libcall RTLibType;

  if (UseSizedLibcall) {

    switch (Size) {

    case 1:

      RTLibType = Libcalls[1];

      break;

    case 2:

      RTLibType = Libcalls[2];

      break;

    case 4:

      RTLibType = Libcalls[3];

      break;

    case 8:

      RTLibType = Libcalls[4];

      break;

    case 16:

      RTLibType = Libcalls[5];

      break;

    }

  } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {

    RTLibType = Libcalls[0];

  } else {

    // Can't use sized function, and there's no generic for this

    // operation, so give up.

    return false;

  }


  if (!TLI->getLibcallName(RTLibType)) {

    // This target does not implement the requested atomic libcall so give up.

    return false;

  }


  // Build up the function call. There's two kinds. First, the sized

  // variants.  These calls are going to be one of the following (with

  // N=1,2,4,8,16):

  //  iN    __atomic_load_N(iN *ptr, int ordering)

  //  void  __atomic_store_N(iN *ptr, iN val, int ordering)

  //  iN    __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)

  //  bool  __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,

  //                                    int success_order, int failure_order)

  //

  // Note that these functions can be used for non-integer atomic

  // operations, the values just need to be bitcast to integers on the

  // way in and out.

  //

  // And, then, the generic variants. They look like the following:

  //  void  __atomic_load(size_t size, void *ptr, void *ret, int ordering)

  //  void  __atomic_store(size_t size, void *ptr, void *val, int ordering)

  //  void  __atomic_exchange(size_t size, void *ptr, void *val, void *ret,

  //                          int ordering)

  //  bool  __atomic_compare_exchange(size_t size, void *ptr, void *expected,

  //                                  void *desired, int success_order,

  //                                  int failure_order)

  //

  // The different signatures are built up depending on the

  // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'

  // variables.


  AllocaInst *AllocaCASExpected = nullptr;

  AllocaInst *AllocaValue = nullptr;

  AllocaInst *AllocaResult = nullptr;


  Type *ResultTy;

  SmallVector<Value *, 6> Args;

  AttributeList Attr;


  // 'size' argument.

  if (!UseSizedLibcall) {

    // Note, getIntPtrType is assumed equivalent to size_t.

    Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));

  }


  // 'ptr' argument.

  // note: This assumes all address spaces share a common libfunc

  // implementation and that addresses are convertable.  For systems without

  // that property, we'd need to extend this mechanism to support AS-specific

  // families of atomic intrinsics.

  Value *PtrVal = PointerOperand;

  PtrVal = Builder.CreateAddrSpaceCast(PtrVal, PointerType::getUnqual(Ctx));

  Args.push_back(PtrVal);


  // 'expected' argument, if present.

  if (CASExpected) {

    AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());

    AllocaCASExpected->setAlignment(AllocaAlignment);

    Builder.CreateLifetimeStart(AllocaCASExpected, SizeVal64);

    Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);

    Args.push_back(AllocaCASExpected);

  }


  // 'val' argument ('desired' for cas), if present.

  if (ValueOperand) {

    if (UseSizedLibcall) {

      Value *IntValue =

          Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);

      Args.push_back(IntValue);

    } else {

      AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());

      AllocaValue->setAlignment(AllocaAlignment);

      Builder.CreateLifetimeStart(AllocaValue, SizeVal64);

      Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);

      Args.push_back(AllocaValue);

    }

  }


  // 'ret' argument.

  if (!CASExpected && HasResult && !UseSizedLibcall) {

    AllocaResult = AllocaBuilder.CreateAlloca(I->getType());

    AllocaResult->setAlignment(AllocaAlignment);

    Builder.CreateLifetimeStart(AllocaResult, SizeVal64);

    Args.push_back(AllocaResult);

  }


  // 'ordering' ('success_order' for cas) argument.

  Args.push_back(OrderingVal);


  // 'failure_order' argument, if present.

  if (Ordering2Val)

    Args.push_back(Ordering2Val);


  // Now, the return type.

  if (CASExpected) {

    ResultTy = Type::getInt1Ty(Ctx);

    Attr = Attr.addRetAttribute(Ctx, Attribute::ZExt);

  } else if (HasResult && UseSizedLibcall)

    ResultTy = SizedIntTy;

  else

    ResultTy = Type::getVoidTy(Ctx);


  // Done with setting up arguments and return types, create the call:

  SmallVector<Type *, 6> ArgTys;

  for (Value *Arg : Args)

    ArgTys.push_back(Arg->getType());

  FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);

  FunctionCallee LibcallFn =

      M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);

  CallInst *Call = Builder.CreateCall(LibcallFn, Args);

  Call->setAttributes(Attr);

  Value *Result = Call;


  // And then, extract the results...

  if (ValueOperand && !UseSizedLibcall)

    Builder.CreateLifetimeEnd(AllocaValue, SizeVal64);


  if (CASExpected) {

    // The final result from the CAS is {load of 'expected' alloca, bool result

    // from call}

    Type *FinalResultTy = I->getType();

    Value *V = PoisonValue::get(FinalResultTy);

    Value *ExpectedOut = Builder.CreateAlignedLoad(

        CASExpected->getType(), AllocaCASExpected, AllocaAlignment);

    Builder.CreateLifetimeEnd(AllocaCASExpected, SizeVal64);

    V = Builder.CreateInsertValue(V, ExpectedOut, 0);

    V = Builder.CreateInsertValue(V, Result, 1);

    I->replaceAllUsesWith(V);

  } else if (HasResult) {

    Value *V;

    if (UseSizedLibcall)

      V = Builder.CreateBitOrPointerCast(Result, I->getType());

    else {

      V = Builder.CreateAlignedLoad(I->getType(), AllocaResult,

                                    AllocaAlignment);

      Builder.CreateLifetimeEnd(AllocaResult, SizeVal64);

    }

    I->replaceAllUsesWith(V);

  }

  I->eraseFromParent();

  return true;

}

Success
#define Success
Definition: AArch64Disassembler.cpp:220

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: ARMSLSHardening.cpp:73

ArrayRef.h

instructions
Expand Atomic instructions
Definition: AtomicExpandPass.cpp:172

performMaskedAtomicOp
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
Definition: AtomicExpandPass.cpp:897

createMaskInstrs
static PartwordMaskValues createMaskInstrs(IRBuilderBase &Builder, Instruction *I, Type *ValueType, Value *Addr, Align AddrAlign, unsigned MinWordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
Definition: AtomicExpandPass.cpp:799

canUseSizedAtomicCall
static bool canUseSizedAtomicCall(unsigned Size, Align Alignment, const DataLayout &DL)
Definition: AtomicExpandPass.cpp:1711

extractMaskedValue
static Value * extractMaskedValue(IRBuilderBase &Builder, Value *WideWord, const PartwordMaskValues &PMV)
Definition: AtomicExpandPass.cpp:866

createCmpXchgInstFun
static void createCmpXchgInstFun(IRBuilderBase &Builder, Value *Addr, Value *Loaded, Value *NewVal, Align AddrAlign, AtomicOrdering MemOpOrder, SyncScope::ID SSID, Value *&Success, Value *&NewLoaded, Instruction *MetadataSrc)
Definition: AtomicExpandPass.cpp:632

getAtomicOpSize
Expand Atomic static false unsigned getAtomicOpSize(LoadInst *LI)
Definition: AtomicExpandPass.cpp:175

DEBUG_TYPE
#define DEBUG_TYPE
Definition: AtomicExpandPass.cpp:59

atomicSizeSupported
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
Definition: AtomicExpandPass.cpp:232

insertMaskedValue
static Value * insertMaskedValue(IRBuilderBase &Builder, Value *WideWord, Value *Updated, const PartwordMaskValues &PMV)
Definition: AtomicExpandPass.cpp:877

copyMetadataForAtomic
static void copyMetadataForAtomic(Instruction &Dest, const Instruction &Source)
Copy metadata that's safe to preserve when widening atomics.
Definition: AtomicExpandPass.cpp:196

GetRMWLibcall
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
Definition: AtomicExpandPass.cpp:1767

AtomicExpandUtils.h

AtomicExpand.h

AtomicOrdering.h
Atomic ordering constants.

Attributes.h
This file contains the simple types necessary to represent the attributes associated with functions a...

Casting.h

LLVM_ATTRIBUTE_UNUSED
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:282

Constants.h
This file contains the declarations for the subclasses of Constant, which represent the different fla...

DataLayout.h

Debug.h

LLVM_DEBUG
#define LLVM_DEBUG(...)
Definition: Debug.h:106

DerivedTypes.h

Addr
uint64_t Addr
Definition: ELFObjHandler.cpp:79

Size
uint64_t Size
Definition: ELFObjHandler.cpp:81

DEBUG_TYPE
#define DEBUG_TYPE
Definition: GenericCycleImpl.h:31

IRBuilder.h

BasicBlock.h

Constant.h

Function.h

Instruction.h

Module.h
Module.h This file contains the declarations for the Module class.

Type.h

User.h

Value.h

InitializePasses.h

InstSimplifyFolder.h

Instructions.h

LowerAtomic.h

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

MDBuilder.h

MemoryModelRelaxationAnnotations.h
This file provides utility for Memory Model Relaxation Annotations (MMRAs).

OptimizationRemarkEmitter.h

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

Pass.h

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

STLFunctionalExtras.h

SmallVector.h
This file defines the SmallVector class.

TargetLowering.h
This file describes how to lower LLVM code to machine code.

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

TargetSubtargetInfo.h

ValueTypes.h

FunctionType
Definition: ItaniumDemangle.h:823

PointerType
Definition: ItaniumDemangle.h:627

T

llvm::AllocaInst
an instruction to allocate memory on the stack
Definition: Instructions.h:63

llvm::AllocaInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:128

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253

llvm::ArrayRef
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41

llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168

llvm::ArrayRef::empty
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:163

llvm::AtomicCmpXchgInst
An instruction that atomically checks whether a specified value is in a memory location,...
Definition: Instructions.h:501

llvm::AtomicCmpXchgInst::getNewValOperand
Value * getNewValOperand()
Definition: Instructions.h:636

llvm::AtomicCmpXchgInst::getMergedOrdering
AtomicOrdering getMergedOrdering() const
Returns a single ordering which is at least as strong as both the success and failure orderings for t...
Definition: Instructions.h:607

llvm::AtomicCmpXchgInst::setWeak
void setWeak(bool IsWeak)
Definition: Instructions.h:564

llvm::AtomicCmpXchgInst::isVolatile
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:555

llvm::AtomicCmpXchgInst::getCompareOperand
Value * getCompareOperand()
Definition: Instructions.h:633

llvm::AtomicCmpXchgInst::getFailureOrdering
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:594

llvm::AtomicCmpXchgInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:629

llvm::AtomicCmpXchgInst::getStrongestFailureOrdering
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success.
Definition: Instructions.h:652

llvm::AtomicCmpXchgInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:544

llvm::AtomicCmpXchgInst::isWeak
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:562

llvm::AtomicCmpXchgInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:559

llvm::AtomicCmpXchgInst::getSuccessOrdering
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:582

llvm::AtomicCmpXchgInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:620

llvm::AtomicExpandPass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AtomicExpandPass.cpp:421

llvm::AtomicRMWInst
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704

llvm::AtomicRMWInst::getAlign
Align getAlign() const
Return the alignment of the memory that is being allocated by the instruction.
Definition: Instructions.h:827

llvm::AtomicRMWInst::isVolatile
bool isVolatile() const
Return true if this is a RMW on a volatile memory location.
Definition: Instructions.h:837

llvm::AtomicRMWInst::BinOp
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:716

llvm::AtomicRMWInst::Add
@ Add
*p = old + v
Definition: Instructions.h:720

llvm::AtomicRMWInst::FAdd
@ FAdd
*p = old + v
Definition: Instructions.h:741

llvm::AtomicRMWInst::USubCond
@ USubCond
Subtract only if no unsigned overflow.
Definition: Instructions.h:764

llvm::AtomicRMWInst::Min
@ Min
*p = old <signed v ? old : v
Definition: Instructions.h:734

llvm::AtomicRMWInst::BAD_BINOP
@ BAD_BINOP
Definition: Instructions.h:772

llvm::AtomicRMWInst::Or
@ Or
*p = old | v
Definition: Instructions.h:728

llvm::AtomicRMWInst::Sub
@ Sub
*p = old - v
Definition: Instructions.h:722

llvm::AtomicRMWInst::And
@ And
*p = old & v
Definition: Instructions.h:724

llvm::AtomicRMWInst::Xor
@ Xor
*p = old ^ v
Definition: Instructions.h:730

llvm::AtomicRMWInst::USubSat
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
Definition: Instructions.h:768

llvm::AtomicRMWInst::FSub
@ FSub
*p = old - v
Definition: Instructions.h:744

llvm::AtomicRMWInst::UIncWrap
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756

llvm::AtomicRMWInst::Max
@ Max
*p = old >signed v ? old : v
Definition: Instructions.h:732

llvm::AtomicRMWInst::UMin
@ UMin
*p = old <unsigned v ? old : v
Definition: Instructions.h:738

llvm::AtomicRMWInst::FMin
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:752

llvm::AtomicRMWInst::UMax
@ UMax
*p = old >unsigned v ? old : v
Definition: Instructions.h:736

llvm::AtomicRMWInst::FMax
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:748

llvm::AtomicRMWInst::UDecWrap
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760

llvm::AtomicRMWInst::Xchg
@ Xchg
*p = v
Definition: Instructions.h:718

llvm::AtomicRMWInst::Nand
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726

llvm::AtomicRMWInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:870

llvm::AtomicRMWInst::getOperation
BinOp getOperation() const
Definition: Instructions.h:805

llvm::AtomicRMWInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this rmw instruction.
Definition: Instructions.h:861

llvm::AtomicRMWInst::getValOperand
Value * getValOperand()
Definition: Instructions.h:874

llvm::AtomicRMWInst::getOperationName
static StringRef getOperationName(BinOp Op)
Definition: Instructions.cpp:1392

llvm::AtomicRMWInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:847

llvm::AttributeList
Definition: Attributes.h:490

llvm::AttributeList::addRetAttribute
AttributeList addRetAttribute(LLVMContext &C, Attribute::AttrKind Kind) const
Add a return value attribute to the list.
Definition: Attributes.h:606

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:61

llvm::BasicBlock::end
iterator end()
Definition: BasicBlock.h:461

llvm::BasicBlock::begin
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:448

llvm::BasicBlock::rbegin
reverse_iterator rbegin()
Definition: BasicBlock.h:464

llvm::BasicBlock::Create
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:212

llvm::BasicBlock::splitBasicBlock
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="", bool Before=false)
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:577

llvm::BasicBlock::reverse_iterator
InstListType::reverse_iterator reverse_iterator
Definition: BasicBlock.h:179

llvm::BasicBlock::getParent
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219

llvm::BasicBlock::rend
reverse_iterator rend()
Definition: BasicBlock.h:466

llvm::CallInst
This class represents a function call, abstracting a target machine's calling convention.
Definition: Instructions.h:1479

llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:83

llvm::ConstantInt::getTrue
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:866

llvm::ConstantInt::getFalse
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:873

llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:42

llvm::Constant::getNullValue
static Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
Definition: Constants.cpp:373

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:32

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63

llvm::ExtractValueInst
This instruction extracts a struct member or array element value from an aggregate value.
Definition: Instructions.h:2397

llvm::ExtractValueInst::getIndices
ArrayRef< unsigned > getIndices() const
Definition: Instructions.h:2449

llvm::ExtractValueInst::getNumIndices
unsigned getNumIndices() const
Definition: Instructions.h:2453

llvm::FunctionCallee
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310

llvm::FunctionPass::runOnFunction
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.

llvm::FunctionType::get
static FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.

llvm::Function
Definition: Function.h:63

llvm::Function::iterator
BasicBlockListType::iterator iterator
Definition: Function.h:68

llvm::Function::hasFnAttribute
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731

llvm::IRBuilderBase
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113

llvm::IRBuilderBase::CreateAtomicCmpXchg
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, MaybeAlign Align, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1849

llvm::IRBuilderBase::CreateInsertValue
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2562

llvm::IRBuilderBase::getIntNTy
IntegerType * getIntNTy(unsigned N)
Fetch the type representing an N-bit integer.
Definition: IRBuilder.h:558

llvm::IRBuilderBase::CreateAlignedLoad
LoadInst * CreateAlignedLoad(Type *Ty, Value *Ptr, MaybeAlign Align, const char *Name)
Definition: IRBuilder.h:1815

llvm::IRBuilderBase::CreateUnreachable
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:1306

llvm::IRBuilderBase::CreateLifetimeStart
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:460

llvm::IRBuilderBase::CreateExtractValue
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:2555

llvm::IRBuilderBase::GetInsertPoint
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:194

llvm::IRBuilderBase::CreateIntToPtr
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2147

llvm::IRBuilderBase::CreateLShr
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1480

llvm::IRBuilderBase::CreateCast
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="", MDNode *FPMathTag=nullptr, FMFSource FMFSource={})
Definition: IRBuilder.h:2186

llvm::IRBuilderBase::GetInsertBlock
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193

llvm::IRBuilderBase::CreateICmpNE
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2274

llvm::IRBuilderBase::CollectMetadataToCopy
void CollectMetadataToCopy(Instruction *Src, ArrayRef< unsigned > MetadataKinds)
Collect metadata with IDs MetadataKinds from Src which should be added to all created instructions.
Definition: IRBuilder.h:252

llvm::IRBuilderBase::CreateIntrinsic
CallInst * CreateIntrinsic(Intrinsic::ID ID, ArrayRef< Type * > Types, ArrayRef< Value * > Args, FMFSource FMFSource={}, const Twine &Name="")
Create a call to intrinsic ID with Args, mangled using Types.
Definition: IRBuilder.cpp:900

llvm::IRBuilderBase::CreateBitOrPointerCast
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2234

llvm::IRBuilderBase::CreatePHI
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:2435

llvm::IRBuilderBase::CreateNot
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1757

llvm::IRBuilderBase::CreateICmpEQ
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:2270

llvm::IRBuilderBase::setIsFPConstrained
void setIsFPConstrained(bool IsCon)
Enable/Disable use of constrained floating point math.
Definition: IRBuilder.h:336

llvm::IRBuilderBase::CreateBitCast
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2152

llvm::IRBuilderBase::CreateCondBr
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional 'br Cond, TrueDest, FalseDest' instruction.
Definition: IRBuilder.h:1164

llvm::IRBuilderBase::CreateLoad
LoadInst * CreateLoad(Type *Ty, Value *Ptr, const char *Name)
Provided to resolve 'CreateLoad(Ty, Ptr, "...")' correctly, instead of converting the string to 'bool...
Definition: IRBuilder.h:1798

llvm::IRBuilderBase::CreateShl
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1459

llvm::IRBuilderBase::CreateZExt
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="", bool IsNonNeg=false)
Definition: IRBuilder.h:2033

llvm::IRBuilderBase::getContext
LLVMContext & getContext() const
Definition: IRBuilder.h:195

llvm::IRBuilderBase::CreateAnd
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1518

llvm::IRBuilderBase::CreatePtrToInt
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2142

llvm::IRBuilderBase::CreateCall
CallInst * CreateCall(FunctionType *FTy, Value *Callee, ArrayRef< Value * > Args={}, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:2449

llvm::IRBuilderBase::CreateAtomicRMW
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, MaybeAlign Align, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1862

llvm::IRBuilderBase::CreateTrunc
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition: IRBuilder.h:2019

llvm::IRBuilderBase::CreateLifetimeEnd
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:472

llvm::IRBuilderBase::CreateOr
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1540

llvm::IRBuilderBase::CreateBr
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition: IRBuilder.h:1158

llvm::IRBuilderBase::SetInsertPoint
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition: IRBuilder.h:199

llvm::IRBuilderBase::CreateAlignedStore
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, MaybeAlign Align, bool isVolatile=false)
Definition: IRBuilder.h:1834

llvm::IRBuilderBase::CreateXor
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1562

llvm::IRBuilderBase::CreateAddrSpaceCast
Value * CreateAddrSpaceCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:2157

llvm::IRBuilderCallbackInserter
Provides an 'InsertHelper' that calls a user-provided callback after performing the default insertion...
Definition: IRBuilder.h:74

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705

llvm::InstSimplifyFolder
InstSimplifyFolder - Use InstructionSimplify to fold operations to existing values.
Definition: InstSimplifyFolder.h:35

llvm::Instruction
Definition: Instruction.h:68

llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:68

llvm::Instruction::eraseFromParent
InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
Definition: Instruction.cpp:94

llvm::Instruction::getFunction
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72

llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679

llvm::Instruction::getDataLayout
const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
Definition: Instruction.cpp:76

llvm::Instruction::CastOps
CastOps
Definition: Instruction.h:986

llvm::IntegerType
Class to represent integer types.
Definition: DerivedTypes.h:42

llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311

llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67

llvm::LLVMContext::getMDKindID
unsigned getMDKindID(StringRef Name) const
getMDKindID - Return a unique non-zero ID for the specified metadata kind.
Definition: LLVMContext.cpp:276

llvm::LLVMContext::getSyncScopeNames
void getSyncScopeNames(SmallVectorImpl< StringRef > &SSNs) const
getSyncScopeNames - Populates client supplied SmallVector with synchronization scope names registered...
Definition: LLVMContext.cpp:310

llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176

llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:255

llvm::LoadInst::isVolatile
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:205

llvm::LoadInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this load instruction.
Definition: Instructions.h:241

llvm::LoadInst::getOrdering
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:220

llvm::LoadInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:208

llvm::LoadInst::getSyncScopeID
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:230

llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211

llvm::MDBuilder
Definition: MDBuilder.h:36

llvm::MDNode
Metadata node.
Definition: Metadata.h:1073

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65

llvm::Module::getContext
LLVMContext & getContext() const
Get the global data context.
Definition: Module.h:302

llvm::OptimizationRemarkEmitter
The optimization diagnostic interface.
Definition: OptimizationRemarkEmitter.h:32

llvm::OptimizationRemark
Diagnostic information for applied optimization remarks.
Definition: DiagnosticInfo.h:762

llvm::PHINode
Definition: Instructions.h:2600

llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2735

llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24

llvm::PointerType::getUnqual
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:686

llvm::PointerUnion< const Value *, const PseudoSourceValue * >

llvm::PoisonValue::get
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1878

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111

llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117

llvm::SmallVectorBase::empty
bool empty() const
Definition: SmallVector.h:81

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:413

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::StoreInst
An instruction for storing to memory.
Definition: Instructions.h:292

llvm::StoreInst::setVolatile
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:328

llvm::StoreInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:337

llvm::StoreInst::setAtomic
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:364

llvm::TargetLoweringBase::getMaxAtomicSizeInBitsSupported
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
Definition: TargetLowering.h:2136

llvm::TargetLoweringBase::AtomicExpansionKind
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
Definition: TargetLowering.h:253

llvm::TargetLoweringBase::AtomicExpansionKind::LLSC
@ LLSC

llvm::TargetLoweringBase::AtomicExpansionKind::CmpArithIntrinsic
@ CmpArithIntrinsic

llvm::TargetLoweringBase::AtomicExpansionKind::CmpXChg
@ CmpXChg

llvm::TargetLoweringBase::AtomicExpansionKind::LLOnly
@ LLOnly

llvm::TargetLoweringBase::AtomicExpansionKind::BitTestIntrinsic
@ BitTestIntrinsic

llvm::TargetLoweringBase::AtomicExpansionKind::NotAtomic
@ NotAtomic

llvm::TargetLoweringBase::AtomicExpansionKind::None
@ None

llvm::TargetLoweringBase::AtomicExpansionKind::Expand
@ Expand

llvm::TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic
@ MaskedIntrinsic

llvm::TargetLowering
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
Definition: TargetLowering.h:3780

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77

llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:85

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::isVectorTy
bool isVectorTy() const
True if this is an instance of VectorType.
Definition: Type.h:270

llvm::Type::isPointerTy
bool isPointerTy() const
True if this is an instance of PointerType.
Definition: Type.h:264

llvm::Type::getInt1Ty
static IntegerType * getInt1Ty(LLVMContext &C)

llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)

llvm::Type::getVoidTy
static Type * getVoidTy(LLVMContext &C)

llvm::Type::isFloatingPointTy
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:184

llvm::Type::getInt32Ty
static IntegerType * getInt32Ty(LLVMContext &C)

llvm::Type::getInt64Ty
static IntegerType * getInt64Ty(LLVMContext &C)

llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.

llvm::User
Definition: User.h:44

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255

llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534

llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421

llvm::Value::use_empty
bool use_empty() const
Definition: Value.h:344

llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075

llvm::function_ref
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLFunctionalExtras.h:37

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition: ilist_node.h:32

llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:132

llvm::raw_ostream
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52

uint64_t

uint8_t

unsigned

ErrorHandling.h

llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:143

TargetMachine.h

false
Definition: StackSlotColoring.cpp:193

llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:395

llvm::ARM::ProfileKind::M
@ M

llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125

llvm::CallingConv::C
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34

llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24

llvm::M68k::MemAddrModeKind::V
@ V

llvm::MCID::Call
@ Call
Definition: MCInstrDesc.h:156

llvm::NVPTX::Ordering
Ordering
Definition: NVPTX.h:116

llvm::RISCVFenceField::O
@ O
Definition: RISCVBaseInfo.h:370

llvm::RTLIB::Libcall
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit.
Definition: RuntimeLibcalls.h:33

llvm::SIEncodingFamily::SI
@ SI
Definition: SIDefines.h:36

llvm::SystemZISD::TM
@ TM
Definition: SystemZISelLowering.h:66

llvm::X86::FirstMacroFusionInstKind::Cmp
@ Cmp

llvm::lltok::Kind
Kind
Definition: LLToken.h:18

llvm::ms_demangle::QualifierMangleMode::Result
@ Result

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::initializeAtomicExpandLegacyPass
void initializeAtomicExpandLegacyPass(PassRegistry &)

llvm::canInstructionHaveMMRAs
bool canInstructionHaveMMRAs(const Instruction &I)
Definition: MemoryModelRelaxationAnnotations.cpp:166

llvm::isReleaseOrStronger
bool isReleaseOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:133

llvm::toCABI
AtomicOrderingCABI toCABI(AtomicOrdering AO)
Definition: AtomicOrdering.h:147

llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163

llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167

llvm::buildAtomicRMWValue
Value * buildAtomicRMWValue(AtomicRMWInst::BinOp Op, IRBuilderBase &Builder, Value *Loaded, Value *Val)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value.
Definition: LowerAtomic.cpp:52

llvm::AtomicOrdering
AtomicOrdering
Atomic ordering for LLVM's memory model.
Definition: AtomicOrdering.h:56

llvm::AtomicOrdering::Monotonic
@ Monotonic

llvm::AtomicOrdering::Unordered
@ Unordered

llvm::AtomicOrdering::NotAtomic
@ NotAtomic

llvm::AtomicOrdering::Acquire
@ Acquire

llvm::expandAtomicRMWToCmpXchg
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
Definition: AtomicExpandPass.cpp:1684

llvm::RecurKind::Or
@ Or
Bitwise or logical OR of integers.

llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.

llvm::operator<<
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:303

llvm::isAcquireOrStronger
bool isAcquireOrStronger(AtomicOrdering AO)
Definition: AtomicOrdering.h:129

llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:217

llvm::lowerAtomicCmpXchgInst
bool lowerAtomicCmpXchgInst(AtomicCmpXchgInst *CXI)
Convert the given Cmpxchg into primitive load and compare.
Definition: LowerAtomic.cpp:22

llvm::lowerAtomicRMWInst
bool lowerAtomicRMWInst(AtomicRMWInst *RMWI)
Convert the given RMWI into primitive load and stores, assuming that doing so is legal.
Definition: LowerAtomic.cpp:121

llvm::createAtomicExpandLegacyPass
FunctionPass * createAtomicExpandLegacyPass()
AtomicExpandPass - At IR level this pass replace atomic instructions with __atomic_* library calls,...
Definition: AtomicExpandPass.cpp:417

llvm::ValueType
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
Definition: ScheduleDAGInstrs.h:101

llvm::AtomicExpandID
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional lo...
Definition: AtomicExpandPass.cpp:166

raw_ostream.h

N
#define N

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35

llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368

llvm::EVT::getStoreSizeInBits
TypeSize getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:407