LLVM  8.0.0svn
AtomicExpandPass.cpp
Go to the documentation of this file.
1 //===- AtomicExpandPass.cpp - Expand atomic instructions ------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file contains a pass (at IR level) to replace atomic instructions with
11 // __atomic_* library calls, or target specific instruction which implement the
12 // same semantics in a way which better fits the target backend. This can
13 // include the use of (intrinsic-based) load-linked/store-conditional loops,
14 // AtomicCmpXchg, or type coercions.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/IR/Attributes.h"
28 #include "llvm/IR/BasicBlock.h"
29 #include "llvm/IR/Constant.h"
30 #include "llvm/IR/Constants.h"
31 #include "llvm/IR/DataLayout.h"
32 #include "llvm/IR/DerivedTypes.h"
33 #include "llvm/IR/Function.h"
34 #include "llvm/IR/IRBuilder.h"
35 #include "llvm/IR/InstIterator.h"
36 #include "llvm/IR/Instruction.h"
37 #include "llvm/IR/Instructions.h"
38 #include "llvm/IR/Module.h"
39 #include "llvm/IR/Type.h"
40 #include "llvm/IR/User.h"
41 #include "llvm/IR/Value.h"
42 #include "llvm/Pass.h"
44 #include "llvm/Support/Casting.h"
45 #include "llvm/Support/Debug.h"
49 #include <cassert>
50 #include <cstdint>
51 #include <iterator>
52 
53 using namespace llvm;
54 
55 #define DEBUG_TYPE "atomic-expand"
56 
57 namespace {
58 
59  class AtomicExpand: public FunctionPass {
60  const TargetLowering *TLI = nullptr;
61 
62  public:
63  static char ID; // Pass identification, replacement for typeid
64 
65  AtomicExpand() : FunctionPass(ID) {
67  }
68 
69  bool runOnFunction(Function &F) override;
70 
71  private:
72  bool bracketInstWithFences(Instruction *I, AtomicOrdering Order);
73  IntegerType *getCorrespondingIntegerType(Type *T, const DataLayout &DL);
74  LoadInst *convertAtomicLoadToIntegerType(LoadInst *LI);
75  bool tryExpandAtomicLoad(LoadInst *LI);
76  bool expandAtomicLoadToLL(LoadInst *LI);
77  bool expandAtomicLoadToCmpXchg(LoadInst *LI);
78  StoreInst *convertAtomicStoreToIntegerType(StoreInst *SI);
79  bool expandAtomicStore(StoreInst *SI);
80  bool tryExpandAtomicRMW(AtomicRMWInst *AI);
81  Value *
82  insertRMWLLSCLoop(IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
83  AtomicOrdering MemOpOrder,
84  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
85  void expandAtomicOpToLLSC(
86  Instruction *I, Type *ResultTy, Value *Addr, AtomicOrdering MemOpOrder,
87  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp);
88  void expandPartwordAtomicRMW(
89  AtomicRMWInst *I,
91  AtomicRMWInst *widenPartwordAtomicRMW(AtomicRMWInst *AI);
92  void expandPartwordCmpXchg(AtomicCmpXchgInst *I);
93  void expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI);
94 
95  AtomicCmpXchgInst *convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI);
96  static Value *insertRMWCmpXchgLoop(
97  IRBuilder<> &Builder, Type *ResultType, Value *Addr,
98  AtomicOrdering MemOpOrder,
99  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
100  CreateCmpXchgInstFun CreateCmpXchg);
101  bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
102 
103  bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
104  bool isIdempotentRMW(AtomicRMWInst *RMWI);
105  bool simplifyIdempotentRMW(AtomicRMWInst *RMWI);
106 
107  bool expandAtomicOpToLibcall(Instruction *I, unsigned Size, unsigned Align,
108  Value *PointerOperand, Value *ValueOperand,
109  Value *CASExpected, AtomicOrdering Ordering,
110  AtomicOrdering Ordering2,
111  ArrayRef<RTLIB::Libcall> Libcalls);
112  void expandAtomicLoadToLibcall(LoadInst *LI);
113  void expandAtomicStoreToLibcall(StoreInst *LI);
114  void expandAtomicRMWToLibcall(AtomicRMWInst *I);
115  void expandAtomicCASToLibcall(AtomicCmpXchgInst *I);
116 
117  friend bool
119  CreateCmpXchgInstFun CreateCmpXchg);
120  };
121 
122 } // end anonymous namespace
123 
124 char AtomicExpand::ID = 0;
125 
127 
128 INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions",
129  false, false)
130 
131 FunctionPass *llvm::createAtomicExpandPass() { return new AtomicExpand(); }
132 
133 // Helper functions to retrieve the size of atomic instructions.
134 static unsigned getAtomicOpSize(LoadInst *LI) {
135  const DataLayout &DL = LI->getModule()->getDataLayout();
136  return DL.getTypeStoreSize(LI->getType());
137 }
138 
139 static unsigned getAtomicOpSize(StoreInst *SI) {
140  const DataLayout &DL = SI->getModule()->getDataLayout();
141  return DL.getTypeStoreSize(SI->getValueOperand()->getType());
142 }
143 
144 static unsigned getAtomicOpSize(AtomicRMWInst *RMWI) {
145  const DataLayout &DL = RMWI->getModule()->getDataLayout();
146  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
147 }
148 
149 static unsigned getAtomicOpSize(AtomicCmpXchgInst *CASI) {
150  const DataLayout &DL = CASI->getModule()->getDataLayout();
151  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
152 }
153 
154 // Helper functions to retrieve the alignment of atomic instructions.
155 static unsigned getAtomicOpAlign(LoadInst *LI) {
156  unsigned Align = LI->getAlignment();
157  // In the future, if this IR restriction is relaxed, we should
158  // return DataLayout::getABITypeAlignment when there's no align
159  // value.
160  assert(Align != 0 && "An atomic LoadInst always has an explicit alignment");
161  return Align;
162 }
163 
164 static unsigned getAtomicOpAlign(StoreInst *SI) {
165  unsigned Align = SI->getAlignment();
166  // In the future, if this IR restriction is relaxed, we should
167  // return DataLayout::getABITypeAlignment when there's no align
168  // value.
169  assert(Align != 0 && "An atomic StoreInst always has an explicit alignment");
170  return Align;
171 }
172 
173 static unsigned getAtomicOpAlign(AtomicRMWInst *RMWI) {
174  // TODO(PR27168): This instruction has no alignment attribute, but unlike the
175  // default alignment for load/store, the default here is to assume
176  // it has NATURAL alignment, not DataLayout-specified alignment.
177  const DataLayout &DL = RMWI->getModule()->getDataLayout();
178  return DL.getTypeStoreSize(RMWI->getValOperand()->getType());
179 }
180 
181 static unsigned getAtomicOpAlign(AtomicCmpXchgInst *CASI) {
182  // TODO(PR27168): same comment as above.
183  const DataLayout &DL = CASI->getModule()->getDataLayout();
184  return DL.getTypeStoreSize(CASI->getCompareOperand()->getType());
185 }
186 
187 // Determine if a particular atomic operation has a supported size,
188 // and is of appropriate alignment, to be passed through for target
189 // lowering. (Versus turning into a __atomic libcall)
190 template <typename Inst>
191 static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I) {
192  unsigned Size = getAtomicOpSize(I);
193  unsigned Align = getAtomicOpAlign(I);
194  return Align >= Size && Size <= TLI->getMaxAtomicSizeInBitsSupported() / 8;
195 }
196 
198  auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
199  if (!TPC)
200  return false;
201 
202  auto &TM = TPC->getTM<TargetMachine>();
203  if (!TM.getSubtargetImpl(F)->enableAtomicExpand())
204  return false;
205  TLI = TM.getSubtargetImpl(F)->getTargetLowering();
206 
207  SmallVector<Instruction *, 1> AtomicInsts;
208 
209  // Changing control-flow while iterating through it is a bad idea, so gather a
210  // list of all atomic instructions before we start.
211  for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
212  Instruction *I = &*II;
213  if (I->isAtomic() && !isa<FenceInst>(I))
214  AtomicInsts.push_back(I);
215  }
216 
217  bool MadeChange = false;
218  for (auto I : AtomicInsts) {
219  auto LI = dyn_cast<LoadInst>(I);
220  auto SI = dyn_cast<StoreInst>(I);
221  auto RMWI = dyn_cast<AtomicRMWInst>(I);
222  auto CASI = dyn_cast<AtomicCmpXchgInst>(I);
223  assert((LI || SI || RMWI || CASI) && "Unknown atomic instruction");
224 
225  // If the Size/Alignment is not supported, replace with a libcall.
226  if (LI) {
227  if (!atomicSizeSupported(TLI, LI)) {
228  expandAtomicLoadToLibcall(LI);
229  MadeChange = true;
230  continue;
231  }
232  } else if (SI) {
233  if (!atomicSizeSupported(TLI, SI)) {
234  expandAtomicStoreToLibcall(SI);
235  MadeChange = true;
236  continue;
237  }
238  } else if (RMWI) {
239  if (!atomicSizeSupported(TLI, RMWI)) {
240  expandAtomicRMWToLibcall(RMWI);
241  MadeChange = true;
242  continue;
243  }
244  } else if (CASI) {
245  if (!atomicSizeSupported(TLI, CASI)) {
246  expandAtomicCASToLibcall(CASI);
247  MadeChange = true;
248  continue;
249  }
250  }
251 
252  if (TLI->shouldInsertFencesForAtomic(I)) {
253  auto FenceOrdering = AtomicOrdering::Monotonic;
254  if (LI && isAcquireOrStronger(LI->getOrdering())) {
255  FenceOrdering = LI->getOrdering();
256  LI->setOrdering(AtomicOrdering::Monotonic);
257  } else if (SI && isReleaseOrStronger(SI->getOrdering())) {
258  FenceOrdering = SI->getOrdering();
259  SI->setOrdering(AtomicOrdering::Monotonic);
260  } else if (RMWI && (isReleaseOrStronger(RMWI->getOrdering()) ||
261  isAcquireOrStronger(RMWI->getOrdering()))) {
262  FenceOrdering = RMWI->getOrdering();
263  RMWI->setOrdering(AtomicOrdering::Monotonic);
264  } else if (CASI &&
265  TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
267  (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
268  isAcquireOrStronger(CASI->getSuccessOrdering()))) {
269  // If a compare and swap is lowered to LL/SC, we can do smarter fence
270  // insertion, with a stronger one on the success path than on the
271  // failure path. As a result, fence insertion is directly done by
272  // expandAtomicCmpXchg in that case.
273  FenceOrdering = CASI->getSuccessOrdering();
274  CASI->setSuccessOrdering(AtomicOrdering::Monotonic);
275  CASI->setFailureOrdering(AtomicOrdering::Monotonic);
276  }
277 
278  if (FenceOrdering != AtomicOrdering::Monotonic) {
279  MadeChange |= bracketInstWithFences(I, FenceOrdering);
280  }
281  }
282 
283  if (LI) {
284  if (LI->getType()->isFloatingPointTy()) {
285  // TODO: add a TLI hook to control this so that each target can
286  // convert to lowering the original type one at a time.
287  LI = convertAtomicLoadToIntegerType(LI);
288  assert(LI->getType()->isIntegerTy() && "invariant broken");
289  MadeChange = true;
290  }
291 
292  MadeChange |= tryExpandAtomicLoad(LI);
293  } else if (SI) {
294  if (SI->getValueOperand()->getType()->isFloatingPointTy()) {
295  // TODO: add a TLI hook to control this so that each target can
296  // convert to lowering the original type one at a time.
297  SI = convertAtomicStoreToIntegerType(SI);
298  assert(SI->getValueOperand()->getType()->isIntegerTy() &&
299  "invariant broken");
300  MadeChange = true;
301  }
302 
303  if (TLI->shouldExpandAtomicStoreInIR(SI))
304  MadeChange |= expandAtomicStore(SI);
305  } else if (RMWI) {
306  // There are two different ways of expanding RMW instructions:
307  // - into a load if it is idempotent
308  // - into a Cmpxchg/LL-SC loop otherwise
309  // we try them in that order.
310 
311  if (isIdempotentRMW(RMWI) && simplifyIdempotentRMW(RMWI)) {
312  MadeChange = true;
313  } else {
314  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
315  unsigned ValueSize = getAtomicOpSize(RMWI);
316  AtomicRMWInst::BinOp Op = RMWI->getOperation();
317  if (ValueSize < MinCASSize &&
318  (Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
319  Op == AtomicRMWInst::And)) {
320  RMWI = widenPartwordAtomicRMW(RMWI);
321  MadeChange = true;
322  }
323 
324  MadeChange |= tryExpandAtomicRMW(RMWI);
325  }
326  } else if (CASI) {
327  // TODO: when we're ready to make the change at the IR level, we can
328  // extend convertCmpXchgToInteger for floating point too.
329  assert(!CASI->getCompareOperand()->getType()->isFloatingPointTy() &&
330  "unimplemented - floating point not legal at IR level");
331  if (CASI->getCompareOperand()->getType()->isPointerTy() ) {
332  // TODO: add a TLI hook to control this so that each target can
333  // convert to lowering the original type one at a time.
334  CASI = convertCmpXchgToIntegerType(CASI);
335  assert(CASI->getCompareOperand()->getType()->isIntegerTy() &&
336  "invariant broken");
337  MadeChange = true;
338  }
339 
340  MadeChange |= tryExpandAtomicCmpXchg(CASI);
341  }
342  }
343  return MadeChange;
344 }
345 
346 bool AtomicExpand::bracketInstWithFences(Instruction *I, AtomicOrdering Order) {
347  IRBuilder<> Builder(I);
348 
349  auto LeadingFence = TLI->emitLeadingFence(Builder, I, Order);
350 
351  auto TrailingFence = TLI->emitTrailingFence(Builder, I, Order);
352  // We have a guard here because not every atomic operation generates a
353  // trailing fence.
354  if (TrailingFence)
355  TrailingFence->moveAfter(I);
356 
357  return (LeadingFence || TrailingFence);
358 }
359 
360 /// Get the iX type with the same bitwidth as T.
361 IntegerType *AtomicExpand::getCorrespondingIntegerType(Type *T,
362  const DataLayout &DL) {
363  EVT VT = TLI->getValueType(DL, T);
364  unsigned BitWidth = VT.getStoreSizeInBits();
365  assert(BitWidth == VT.getSizeInBits() && "must be a power of two");
366  return IntegerType::get(T->getContext(), BitWidth);
367 }
368 
369 /// Convert an atomic load of a non-integral type to an integer load of the
370 /// equivalent bitwidth. See the function comment on
371 /// convertAtomicStoreToIntegerType for background.
372 LoadInst *AtomicExpand::convertAtomicLoadToIntegerType(LoadInst *LI) {
373  auto *M = LI->getModule();
374  Type *NewTy = getCorrespondingIntegerType(LI->getType(),
375  M->getDataLayout());
376 
377  IRBuilder<> Builder(LI);
378 
379  Value *Addr = LI->getPointerOperand();
380  Type *PT = PointerType::get(NewTy,
381  Addr->getType()->getPointerAddressSpace());
382  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
383 
384  auto *NewLI = Builder.CreateLoad(NewAddr);
385  NewLI->setAlignment(LI->getAlignment());
386  NewLI->setVolatile(LI->isVolatile());
387  NewLI->setAtomic(LI->getOrdering(), LI->getSyncScopeID());
388  LLVM_DEBUG(dbgs() << "Replaced " << *LI << " with " << *NewLI << "\n");
389 
390  Value *NewVal = Builder.CreateBitCast(NewLI, LI->getType());
391  LI->replaceAllUsesWith(NewVal);
392  LI->eraseFromParent();
393  return NewLI;
394 }
395 
396 bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
397  switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
399  return false;
401  expandAtomicOpToLLSC(
402  LI, LI->getType(), LI->getPointerOperand(), LI->getOrdering(),
403  [](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
404  return true;
406  return expandAtomicLoadToLL(LI);
408  return expandAtomicLoadToCmpXchg(LI);
409  default:
410  llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
411  }
412 }
413 
414 bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
415  IRBuilder<> Builder(LI);
416 
417  // On some architectures, load-linked instructions are atomic for larger
418  // sizes than normal loads. For example, the only 64-bit load guaranteed
419  // to be single-copy atomic by ARM is an ldrexd (A3.5.3).
420  Value *Val =
421  TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
422  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
423 
424  LI->replaceAllUsesWith(Val);
425  LI->eraseFromParent();
426 
427  return true;
428 }
429 
430 bool AtomicExpand::expandAtomicLoadToCmpXchg(LoadInst *LI) {
431  IRBuilder<> Builder(LI);
432  AtomicOrdering Order = LI->getOrdering();
433  Value *Addr = LI->getPointerOperand();
434  Type *Ty = cast<PointerType>(Addr->getType())->getElementType();
435  Constant *DummyVal = Constant::getNullValue(Ty);
436 
437  Value *Pair = Builder.CreateAtomicCmpXchg(
438  Addr, DummyVal, DummyVal, Order,
440  Value *Loaded = Builder.CreateExtractValue(Pair, 0, "loaded");
441 
442  LI->replaceAllUsesWith(Loaded);
443  LI->eraseFromParent();
444 
445  return true;
446 }
447 
448 /// Convert an atomic store of a non-integral type to an integer store of the
449 /// equivalent bitwidth. We used to not support floating point or vector
450 /// atomics in the IR at all. The backends learned to deal with the bitcast
451 /// idiom because that was the only way of expressing the notion of a atomic
452 /// float or vector store. The long term plan is to teach each backend to
453 /// instruction select from the original atomic store, but as a migration
454 /// mechanism, we convert back to the old format which the backends understand.
455 /// Each backend will need individual work to recognize the new format.
456 StoreInst *AtomicExpand::convertAtomicStoreToIntegerType(StoreInst *SI) {
457  IRBuilder<> Builder(SI);
458  auto *M = SI->getModule();
459  Type *NewTy = getCorrespondingIntegerType(SI->getValueOperand()->getType(),
460  M->getDataLayout());
461  Value *NewVal = Builder.CreateBitCast(SI->getValueOperand(), NewTy);
462 
463  Value *Addr = SI->getPointerOperand();
464  Type *PT = PointerType::get(NewTy,
465  Addr->getType()->getPointerAddressSpace());
466  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
467 
468  StoreInst *NewSI = Builder.CreateStore(NewVal, NewAddr);
469  NewSI->setAlignment(SI->getAlignment());
470  NewSI->setVolatile(SI->isVolatile());
471  NewSI->setAtomic(SI->getOrdering(), SI->getSyncScopeID());
472  LLVM_DEBUG(dbgs() << "Replaced " << *SI << " with " << *NewSI << "\n");
473  SI->eraseFromParent();
474  return NewSI;
475 }
476 
477 bool AtomicExpand::expandAtomicStore(StoreInst *SI) {
478  // This function is only called on atomic stores that are too large to be
479  // atomic if implemented as a native store. So we replace them by an
480  // atomic swap, that can be implemented for example as a ldrex/strex on ARM
481  // or lock cmpxchg8/16b on X86, as these are atomic for larger sizes.
482  // It is the responsibility of the target to only signal expansion via
483  // shouldExpandAtomicRMW in cases where this is required and possible.
484  IRBuilder<> Builder(SI);
485  AtomicRMWInst *AI =
487  SI->getValueOperand(), SI->getOrdering());
488  SI->eraseFromParent();
489 
490  // Now we have an appropriate swap instruction, lower it as usual.
491  return tryExpandAtomicRMW(AI);
492 }
493 
494 static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
495  Value *Loaded, Value *NewVal,
496  AtomicOrdering MemOpOrder,
497  Value *&Success, Value *&NewLoaded) {
498  Value* Pair = Builder.CreateAtomicCmpXchg(
499  Addr, Loaded, NewVal, MemOpOrder,
501  Success = Builder.CreateExtractValue(Pair, 1, "success");
502  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
503 }
504 
505 /// Emit IR to implement the given atomicrmw operation on values in registers,
506 /// returning the new value.
508  Value *Loaded, Value *Inc) {
509  Value *NewVal;
510  switch (Op) {
511  case AtomicRMWInst::Xchg:
512  return Inc;
513  case AtomicRMWInst::Add:
514  return Builder.CreateAdd(Loaded, Inc, "new");
515  case AtomicRMWInst::Sub:
516  return Builder.CreateSub(Loaded, Inc, "new");
517  case AtomicRMWInst::And:
518  return Builder.CreateAnd(Loaded, Inc, "new");
519  case AtomicRMWInst::Nand:
520  return Builder.CreateNot(Builder.CreateAnd(Loaded, Inc), "new");
521  case AtomicRMWInst::Or:
522  return Builder.CreateOr(Loaded, Inc, "new");
523  case AtomicRMWInst::Xor:
524  return Builder.CreateXor(Loaded, Inc, "new");
525  case AtomicRMWInst::Max:
526  NewVal = Builder.CreateICmpSGT(Loaded, Inc);
527  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
528  case AtomicRMWInst::Min:
529  NewVal = Builder.CreateICmpSLE(Loaded, Inc);
530  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
531  case AtomicRMWInst::UMax:
532  NewVal = Builder.CreateICmpUGT(Loaded, Inc);
533  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
534  case AtomicRMWInst::UMin:
535  NewVal = Builder.CreateICmpULE(Loaded, Inc);
536  return Builder.CreateSelect(NewVal, Loaded, Inc, "new");
537  default:
538  llvm_unreachable("Unknown atomic op");
539  }
540 }
541 
542 bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
543  switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
545  return false;
547  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
548  unsigned ValueSize = getAtomicOpSize(AI);
549  if (ValueSize < MinCASSize) {
551  "MinCmpXchgSizeInBits not yet supported for LL/SC architectures.");
552  } else {
553  auto PerformOp = [&](IRBuilder<> &Builder, Value *Loaded) {
554  return performAtomicOp(AI->getOperation(), Builder, Loaded,
555  AI->getValOperand());
556  };
557  expandAtomicOpToLLSC(AI, AI->getType(), AI->getPointerOperand(),
558  AI->getOrdering(), PerformOp);
559  }
560  return true;
561  }
563  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
564  unsigned ValueSize = getAtomicOpSize(AI);
565  if (ValueSize < MinCASSize) {
566  expandPartwordAtomicRMW(AI,
568  } else {
570  }
571  return true;
572  }
574  expandAtomicRMWToMaskedIntrinsic(AI);
575  return true;
576  }
577  default:
578  llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
579  }
580 }
581 
582 namespace {
583 
584 /// Result values from createMaskInstrs helper.
585 struct PartwordMaskValues {
586  Type *WordType;
587  Type *ValueType;
588  Value *AlignedAddr;
589  Value *ShiftAmt;
590  Value *Mask;
591  Value *Inv_Mask;
592 };
593 
594 } // end anonymous namespace
595 
596 /// This is a helper function which builds instructions to provide
597 /// values necessary for partword atomic operations. It takes an
598 /// incoming address, Addr, and ValueType, and constructs the address,
599 /// shift-amounts and masks needed to work with a larger value of size
600 /// WordSize.
601 ///
602 /// AlignedAddr: Addr rounded down to a multiple of WordSize
603 ///
604 /// ShiftAmt: Number of bits to right-shift a WordSize value loaded
605 /// from AlignAddr for it to have the same value as if
606 /// ValueType was loaded from Addr.
607 ///
608 /// Mask: Value to mask with the value loaded from AlignAddr to
609 /// include only the part that would've been loaded from Addr.
610 ///
611 /// Inv_Mask: The inverse of Mask.
612 static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I,
613  Type *ValueType, Value *Addr,
614  unsigned WordSize) {
615  PartwordMaskValues Ret;
616 
617  BasicBlock *BB = I->getParent();
618  Function *F = BB->getParent();
619  Module *M = I->getModule();
620 
621  LLVMContext &Ctx = F->getContext();
622  const DataLayout &DL = M->getDataLayout();
623 
624  unsigned ValueSize = DL.getTypeStoreSize(ValueType);
625 
626  assert(ValueSize < WordSize);
627 
628  Ret.ValueType = ValueType;
629  Ret.WordType = Type::getIntNTy(Ctx, WordSize * 8);
630 
631  Type *WordPtrType =
632  Ret.WordType->getPointerTo(Addr->getType()->getPointerAddressSpace());
633 
634  Value *AddrInt = Builder.CreatePtrToInt(Addr, DL.getIntPtrType(Ctx));
635  Ret.AlignedAddr = Builder.CreateIntToPtr(
636  Builder.CreateAnd(AddrInt, ~(uint64_t)(WordSize - 1)), WordPtrType,
637  "AlignedAddr");
638 
639  Value *PtrLSB = Builder.CreateAnd(AddrInt, WordSize - 1, "PtrLSB");
640  if (DL.isLittleEndian()) {
641  // turn bytes into bits
642  Ret.ShiftAmt = Builder.CreateShl(PtrLSB, 3);
643  } else {
644  // turn bytes into bits, and count from the other side.
645  Ret.ShiftAmt =
646  Builder.CreateShl(Builder.CreateXor(PtrLSB, WordSize - ValueSize), 3);
647  }
648 
649  Ret.ShiftAmt = Builder.CreateTrunc(Ret.ShiftAmt, Ret.WordType, "ShiftAmt");
650  Ret.Mask = Builder.CreateShl(
651  ConstantInt::get(Ret.WordType, (1 << ValueSize * 8) - 1), Ret.ShiftAmt,
652  "Mask");
653  Ret.Inv_Mask = Builder.CreateNot(Ret.Mask, "Inv_Mask");
654 
655  return Ret;
656 }
657 
658 /// Emit IR to implement a masked version of a given atomicrmw
659 /// operation. (That is, only the bits under the Mask should be
660 /// affected by the operation)
662  IRBuilder<> &Builder, Value *Loaded,
663  Value *Shifted_Inc, Value *Inc,
664  const PartwordMaskValues &PMV) {
665  // TODO: update to use
666  // https://graphics.stanford.edu/~seander/bithacks.html#MaskedMerge in order
667  // to merge bits from two values without requiring PMV.Inv_Mask.
668  switch (Op) {
669  case AtomicRMWInst::Xchg: {
670  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
671  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, Shifted_Inc);
672  return FinalVal;
673  }
674  case AtomicRMWInst::Or:
675  case AtomicRMWInst::Xor:
676  case AtomicRMWInst::And:
677  llvm_unreachable("Or/Xor/And handled by widenPartwordAtomicRMW");
678  case AtomicRMWInst::Add:
679  case AtomicRMWInst::Sub:
680  case AtomicRMWInst::Nand: {
681  // The other arithmetic ops need to be masked into place.
682  Value *NewVal = performAtomicOp(Op, Builder, Loaded, Shifted_Inc);
683  Value *NewVal_Masked = Builder.CreateAnd(NewVal, PMV.Mask);
684  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
685  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Masked);
686  return FinalVal;
687  }
688  case AtomicRMWInst::Max:
689  case AtomicRMWInst::Min:
690  case AtomicRMWInst::UMax:
691  case AtomicRMWInst::UMin: {
692  // Finally, comparison ops will operate on the full value, so
693  // truncate down to the original size, and expand out again after
694  // doing the operation.
695  Value *Loaded_Shiftdown = Builder.CreateTrunc(
696  Builder.CreateLShr(Loaded, PMV.ShiftAmt), PMV.ValueType);
697  Value *NewVal = performAtomicOp(Op, Builder, Loaded_Shiftdown, Inc);
698  Value *NewVal_Shiftup = Builder.CreateShl(
699  Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
700  Value *Loaded_MaskOut = Builder.CreateAnd(Loaded, PMV.Inv_Mask);
701  Value *FinalVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shiftup);
702  return FinalVal;
703  }
704  default:
705  llvm_unreachable("Unknown atomic op");
706  }
707 }
708 
709 /// Expand a sub-word atomicrmw operation into an appropriate
710 /// word-sized operation.
711 ///
712 /// It will create an LL/SC or cmpxchg loop, as appropriate, the same
713 /// way as a typical atomicrmw expansion. The only difference here is
714 /// that the operation inside of the loop must operate only upon a
715 /// part of the value.
716 void AtomicExpand::expandPartwordAtomicRMW(
719 
720  AtomicOrdering MemOpOrder = AI->getOrdering();
721 
722  IRBuilder<> Builder(AI);
723 
724  PartwordMaskValues PMV =
725  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
726  TLI->getMinCmpXchgSizeInBits() / 8);
727 
728  Value *ValOperand_Shifted =
729  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
730  PMV.ShiftAmt, "ValOperand_Shifted");
731 
732  auto PerformPartwordOp = [&](IRBuilder<> &Builder, Value *Loaded) {
733  return performMaskedAtomicOp(AI->getOperation(), Builder, Loaded,
734  ValOperand_Shifted, AI->getValOperand(), PMV);
735  };
736 
737  // TODO: When we're ready to support LLSC conversions too, use
738  // insertRMWLLSCLoop here for ExpansionKind==LLSC.
739  Value *OldResult =
740  insertRMWCmpXchgLoop(Builder, PMV.WordType, PMV.AlignedAddr, MemOpOrder,
741  PerformPartwordOp, createCmpXchgInstFun);
742  Value *FinalOldResult = Builder.CreateTrunc(
743  Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
744  AI->replaceAllUsesWith(FinalOldResult);
745  AI->eraseFromParent();
746 }
747 
748 // Widen the bitwise atomicrmw (or/xor/and) to the minimum supported width.
749 AtomicRMWInst *AtomicExpand::widenPartwordAtomicRMW(AtomicRMWInst *AI) {
750  IRBuilder<> Builder(AI);
752 
753  assert((Op == AtomicRMWInst::Or || Op == AtomicRMWInst::Xor ||
754  Op == AtomicRMWInst::And) &&
755  "Unable to widen operation");
756 
757  PartwordMaskValues PMV =
758  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
759  TLI->getMinCmpXchgSizeInBits() / 8);
760 
761  Value *ValOperand_Shifted =
762  Builder.CreateShl(Builder.CreateZExt(AI->getValOperand(), PMV.WordType),
763  PMV.ShiftAmt, "ValOperand_Shifted");
764 
765  Value *NewOperand;
766 
767  if (Op == AtomicRMWInst::And)
768  NewOperand =
769  Builder.CreateOr(PMV.Inv_Mask, ValOperand_Shifted, "AndOperand");
770  else
771  NewOperand = ValOperand_Shifted;
772 
773  AtomicRMWInst *NewAI = Builder.CreateAtomicRMW(Op, PMV.AlignedAddr,
774  NewOperand, AI->getOrdering());
775 
776  Value *FinalOldResult = Builder.CreateTrunc(
777  Builder.CreateLShr(NewAI, PMV.ShiftAmt), PMV.ValueType);
778  AI->replaceAllUsesWith(FinalOldResult);
779  AI->eraseFromParent();
780  return NewAI;
781 }
782 
783 void AtomicExpand::expandPartwordCmpXchg(AtomicCmpXchgInst *CI) {
784  // The basic idea here is that we're expanding a cmpxchg of a
785  // smaller memory size up to a word-sized cmpxchg. To do this, we
786  // need to add a retry-loop for strong cmpxchg, so that
787  // modifications to other parts of the word don't cause a spurious
788  // failure.
789 
790  // This generates code like the following:
791  // [[Setup mask values PMV.*]]
792  // %NewVal_Shifted = shl i32 %NewVal, %PMV.ShiftAmt
793  // %Cmp_Shifted = shl i32 %Cmp, %PMV.ShiftAmt
794  // %InitLoaded = load i32* %addr
795  // %InitLoaded_MaskOut = and i32 %InitLoaded, %PMV.Inv_Mask
796  // br partword.cmpxchg.loop
797  // partword.cmpxchg.loop:
798  // %Loaded_MaskOut = phi i32 [ %InitLoaded_MaskOut, %entry ],
799  // [ %OldVal_MaskOut, %partword.cmpxchg.failure ]
800  // %FullWord_NewVal = or i32 %Loaded_MaskOut, %NewVal_Shifted
801  // %FullWord_Cmp = or i32 %Loaded_MaskOut, %Cmp_Shifted
802  // %NewCI = cmpxchg i32* %PMV.AlignedAddr, i32 %FullWord_Cmp,
803  // i32 %FullWord_NewVal success_ordering failure_ordering
804  // %OldVal = extractvalue { i32, i1 } %NewCI, 0
805  // %Success = extractvalue { i32, i1 } %NewCI, 1
806  // br i1 %Success, label %partword.cmpxchg.end,
807  // label %partword.cmpxchg.failure
808  // partword.cmpxchg.failure:
809  // %OldVal_MaskOut = and i32 %OldVal, %PMV.Inv_Mask
810  // %ShouldContinue = icmp ne i32 %Loaded_MaskOut, %OldVal_MaskOut
811  // br i1 %ShouldContinue, label %partword.cmpxchg.loop,
812  // label %partword.cmpxchg.end
813  // partword.cmpxchg.end:
814  // %tmp1 = lshr i32 %OldVal, %PMV.ShiftAmt
815  // %FinalOldVal = trunc i32 %tmp1 to i8
816  // %tmp2 = insertvalue { i8, i1 } undef, i8 %FinalOldVal, 0
817  // %Res = insertvalue { i8, i1 } %25, i1 %Success, 1
818 
819  Value *Addr = CI->getPointerOperand();
820  Value *Cmp = CI->getCompareOperand();
821  Value *NewVal = CI->getNewValOperand();
822 
823  BasicBlock *BB = CI->getParent();
824  Function *F = BB->getParent();
825  IRBuilder<> Builder(CI);
826  LLVMContext &Ctx = Builder.getContext();
827 
828  const int WordSize = TLI->getMinCmpXchgSizeInBits() / 8;
829 
830  BasicBlock *EndBB =
831  BB->splitBasicBlock(CI->getIterator(), "partword.cmpxchg.end");
832  auto FailureBB =
833  BasicBlock::Create(Ctx, "partword.cmpxchg.failure", F, EndBB);
834  auto LoopBB = BasicBlock::Create(Ctx, "partword.cmpxchg.loop", F, FailureBB);
835 
836  // The split call above "helpfully" added a branch at the end of BB
837  // (to the wrong place).
838  std::prev(BB->end())->eraseFromParent();
839  Builder.SetInsertPoint(BB);
840 
841  PartwordMaskValues PMV = createMaskInstrs(
842  Builder, CI, CI->getCompareOperand()->getType(), Addr, WordSize);
843 
844  // Shift the incoming values over, into the right location in the word.
845  Value *NewVal_Shifted =
846  Builder.CreateShl(Builder.CreateZExt(NewVal, PMV.WordType), PMV.ShiftAmt);
847  Value *Cmp_Shifted =
848  Builder.CreateShl(Builder.CreateZExt(Cmp, PMV.WordType), PMV.ShiftAmt);
849 
850  // Load the entire current word, and mask into place the expected and new
851  // values
852  LoadInst *InitLoaded = Builder.CreateLoad(PMV.WordType, PMV.AlignedAddr);
853  InitLoaded->setVolatile(CI->isVolatile());
854  Value *InitLoaded_MaskOut = Builder.CreateAnd(InitLoaded, PMV.Inv_Mask);
855  Builder.CreateBr(LoopBB);
856 
857  // partword.cmpxchg.loop:
858  Builder.SetInsertPoint(LoopBB);
859  PHINode *Loaded_MaskOut = Builder.CreatePHI(PMV.WordType, 2);
860  Loaded_MaskOut->addIncoming(InitLoaded_MaskOut, BB);
861 
862  // Mask/Or the expected and new values into place in the loaded word.
863  Value *FullWord_NewVal = Builder.CreateOr(Loaded_MaskOut, NewVal_Shifted);
864  Value *FullWord_Cmp = Builder.CreateOr(Loaded_MaskOut, Cmp_Shifted);
865  AtomicCmpXchgInst *NewCI = Builder.CreateAtomicCmpXchg(
866  PMV.AlignedAddr, FullWord_Cmp, FullWord_NewVal, CI->getSuccessOrdering(),
867  CI->getFailureOrdering(), CI->getSyncScopeID());
868  NewCI->setVolatile(CI->isVolatile());
869  // When we're building a strong cmpxchg, we need a loop, so you
870  // might think we could use a weak cmpxchg inside. But, using strong
871  // allows the below comparison for ShouldContinue, and we're
872  // expecting the underlying cmpxchg to be a machine instruction,
873  // which is strong anyways.
874  NewCI->setWeak(CI->isWeak());
875 
876  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
877  Value *Success = Builder.CreateExtractValue(NewCI, 1);
878 
879  if (CI->isWeak())
880  Builder.CreateBr(EndBB);
881  else
882  Builder.CreateCondBr(Success, EndBB, FailureBB);
883 
884  // partword.cmpxchg.failure:
885  Builder.SetInsertPoint(FailureBB);
886  // Upon failure, verify that the masked-out part of the loaded value
887  // has been modified. If it didn't, abort the cmpxchg, since the
888  // masked-in part must've.
889  Value *OldVal_MaskOut = Builder.CreateAnd(OldVal, PMV.Inv_Mask);
890  Value *ShouldContinue = Builder.CreateICmpNE(Loaded_MaskOut, OldVal_MaskOut);
891  Builder.CreateCondBr(ShouldContinue, LoopBB, EndBB);
892 
893  // Add the second value to the phi from above
894  Loaded_MaskOut->addIncoming(OldVal_MaskOut, FailureBB);
895 
896  // partword.cmpxchg.end:
897  Builder.SetInsertPoint(CI);
898 
899  Value *FinalOldVal = Builder.CreateTrunc(
900  Builder.CreateLShr(OldVal, PMV.ShiftAmt), PMV.ValueType);
901  Value *Res = UndefValue::get(CI->getType());
902  Res = Builder.CreateInsertValue(Res, FinalOldVal, 0);
903  Res = Builder.CreateInsertValue(Res, Success, 1);
904 
905  CI->replaceAllUsesWith(Res);
906  CI->eraseFromParent();
907 }
908 
909 void AtomicExpand::expandAtomicOpToLLSC(
910  Instruction *I, Type *ResultType, Value *Addr, AtomicOrdering MemOpOrder,
911  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
912  IRBuilder<> Builder(I);
913  Value *Loaded =
914  insertRMWLLSCLoop(Builder, ResultType, Addr, MemOpOrder, PerformOp);
915 
916  I->replaceAllUsesWith(Loaded);
917  I->eraseFromParent();
918 }
919 
920 void AtomicExpand::expandAtomicRMWToMaskedIntrinsic(AtomicRMWInst *AI) {
921  IRBuilder<> Builder(AI);
922 
923  PartwordMaskValues PMV =
924  createMaskInstrs(Builder, AI, AI->getType(), AI->getPointerOperand(),
925  TLI->getMinCmpXchgSizeInBits() / 8);
926 
927  // The value operand must be sign-extended for signed min/max so that the
928  // target's signed comparison instructions can be used. Otherwise, just
929  // zero-ext.
930  Instruction::CastOps CastOp = Instruction::ZExt;
931  AtomicRMWInst::BinOp RMWOp = AI->getOperation();
932  if (RMWOp == AtomicRMWInst::Max || RMWOp == AtomicRMWInst::Min)
933  CastOp = Instruction::SExt;
934 
935  Value *ValOperand_Shifted = Builder.CreateShl(
936  Builder.CreateCast(CastOp, AI->getValOperand(), PMV.WordType),
937  PMV.ShiftAmt, "ValOperand_Shifted");
938  Value *OldResult = TLI->emitMaskedAtomicRMWIntrinsic(
939  Builder, AI, PMV.AlignedAddr, ValOperand_Shifted, PMV.Mask, PMV.ShiftAmt,
940  AI->getOrdering());
941  Value *FinalOldResult = Builder.CreateTrunc(
942  Builder.CreateLShr(OldResult, PMV.ShiftAmt), PMV.ValueType);
943  AI->replaceAllUsesWith(FinalOldResult);
944  AI->eraseFromParent();
945 }
946 
947 Value *AtomicExpand::insertRMWLLSCLoop(
948  IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
949  AtomicOrdering MemOpOrder,
950  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp) {
951  LLVMContext &Ctx = Builder.getContext();
952  BasicBlock *BB = Builder.GetInsertBlock();
953  Function *F = BB->getParent();
954 
955  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
956  //
957  // The standard expansion we produce is:
958  // [...]
959  // atomicrmw.start:
960  // %loaded = @load.linked(%addr)
961  // %new = some_op iN %loaded, %incr
962  // %stored = @store_conditional(%new, %addr)
963  // %try_again = icmp i32 ne %stored, 0
964  // br i1 %try_again, label %loop, label %atomicrmw.end
965  // atomicrmw.end:
966  // [...]
967  BasicBlock *ExitBB =
968  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
969  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
970 
971  // The split call above "helpfully" added a branch at the end of BB (to the
972  // wrong place).
973  std::prev(BB->end())->eraseFromParent();
974  Builder.SetInsertPoint(BB);
975  Builder.CreateBr(LoopBB);
976 
977  // Start the main loop block now that we've taken care of the preliminaries.
978  Builder.SetInsertPoint(LoopBB);
979  Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
980 
981  Value *NewVal = PerformOp(Builder, Loaded);
982 
983  Value *StoreSuccess =
984  TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
985  Value *TryAgain = Builder.CreateICmpNE(
986  StoreSuccess, ConstantInt::get(IntegerType::get(Ctx, 32), 0), "tryagain");
987  Builder.CreateCondBr(TryAgain, LoopBB, ExitBB);
988 
989  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
990  return Loaded;
991 }
992 
993 /// Convert an atomic cmpxchg of a non-integral type to an integer cmpxchg of
994 /// the equivalent bitwidth. We used to not support pointer cmpxchg in the
995 /// IR. As a migration step, we convert back to what use to be the standard
996 /// way to represent a pointer cmpxchg so that we can update backends one by
997 /// one.
998 AtomicCmpXchgInst *AtomicExpand::convertCmpXchgToIntegerType(AtomicCmpXchgInst *CI) {
999  auto *M = CI->getModule();
1000  Type *NewTy = getCorrespondingIntegerType(CI->getCompareOperand()->getType(),
1001  M->getDataLayout());
1002 
1003  IRBuilder<> Builder(CI);
1004 
1005  Value *Addr = CI->getPointerOperand();
1006  Type *PT = PointerType::get(NewTy,
1007  Addr->getType()->getPointerAddressSpace());
1008  Value *NewAddr = Builder.CreateBitCast(Addr, PT);
1009 
1010  Value *NewCmp = Builder.CreatePtrToInt(CI->getCompareOperand(), NewTy);
1011  Value *NewNewVal = Builder.CreatePtrToInt(CI->getNewValOperand(), NewTy);
1012 
1013 
1014  auto *NewCI = Builder.CreateAtomicCmpXchg(NewAddr, NewCmp, NewNewVal,
1015  CI->getSuccessOrdering(),
1016  CI->getFailureOrdering(),
1017  CI->getSyncScopeID());
1018  NewCI->setVolatile(CI->isVolatile());
1019  NewCI->setWeak(CI->isWeak());
1020  LLVM_DEBUG(dbgs() << "Replaced " << *CI << " with " << *NewCI << "\n");
1021 
1022  Value *OldVal = Builder.CreateExtractValue(NewCI, 0);
1023  Value *Succ = Builder.CreateExtractValue(NewCI, 1);
1024 
1025  OldVal = Builder.CreateIntToPtr(OldVal, CI->getCompareOperand()->getType());
1026 
1027  Value *Res = UndefValue::get(CI->getType());
1028  Res = Builder.CreateInsertValue(Res, OldVal, 0);
1029  Res = Builder.CreateInsertValue(Res, Succ, 1);
1030 
1031  CI->replaceAllUsesWith(Res);
1032  CI->eraseFromParent();
1033  return NewCI;
1034 }
1035 
1036 bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1037  AtomicOrdering SuccessOrder = CI->getSuccessOrdering();
1038  AtomicOrdering FailureOrder = CI->getFailureOrdering();
1039  Value *Addr = CI->getPointerOperand();
1040  BasicBlock *BB = CI->getParent();
1041  Function *F = BB->getParent();
1042  LLVMContext &Ctx = F->getContext();
1043  // If shouldInsertFencesForAtomic() returns true, then the target does not
1044  // want to deal with memory orders, and emitLeading/TrailingFence should take
1045  // care of everything. Otherwise, emitLeading/TrailingFence are no-op and we
1046  // should preserve the ordering.
1047  bool ShouldInsertFencesForAtomic = TLI->shouldInsertFencesForAtomic(CI);
1048  AtomicOrdering MemOpOrder =
1049  ShouldInsertFencesForAtomic ? AtomicOrdering::Monotonic : SuccessOrder;
1050 
1051  // In implementations which use a barrier to achieve release semantics, we can
1052  // delay emitting this barrier until we know a store is actually going to be
1053  // attempted. The cost of this delay is that we need 2 copies of the block
1054  // emitting the load-linked, affecting code size.
1055  //
1056  // Ideally, this logic would be unconditional except for the minsize check
1057  // since in other cases the extra blocks naturally collapse down to the
1058  // minimal loop. Unfortunately, this puts too much stress on later
1059  // optimisations so we avoid emitting the extra logic in those cases too.
1060  bool HasReleasedLoadBB = !CI->isWeak() && ShouldInsertFencesForAtomic &&
1061  SuccessOrder != AtomicOrdering::Monotonic &&
1062  SuccessOrder != AtomicOrdering::Acquire &&
1063  !F->optForMinSize();
1064 
1065  // There's no overhead for sinking the release barrier in a weak cmpxchg, so
1066  // do it even on minsize.
1067  bool UseUnconditionalReleaseBarrier = F->optForMinSize() && !CI->isWeak();
1068 
1069  // Given: cmpxchg some_op iN* %addr, iN %desired, iN %new success_ord fail_ord
1070  //
1071  // The full expansion we produce is:
1072  // [...]
1073  // cmpxchg.start:
1074  // %unreleasedload = @load.linked(%addr)
1075  // %should_store = icmp eq %unreleasedload, %desired
1076  // br i1 %should_store, label %cmpxchg.fencedstore,
1077  // label %cmpxchg.nostore
1078  // cmpxchg.releasingstore:
1079  // fence?
1080  // br label cmpxchg.trystore
1081  // cmpxchg.trystore:
1082  // %loaded.trystore = phi [%unreleasedload, %releasingstore],
1083  // [%releasedload, %cmpxchg.releasedload]
1084  // %stored = @store_conditional(%new, %addr)
1085  // %success = icmp eq i32 %stored, 0
1086  // br i1 %success, label %cmpxchg.success,
1087  // label %cmpxchg.releasedload/%cmpxchg.failure
1088  // cmpxchg.releasedload:
1089  // %releasedload = @load.linked(%addr)
1090  // %should_store = icmp eq %releasedload, %desired
1091  // br i1 %should_store, label %cmpxchg.trystore,
1092  // label %cmpxchg.failure
1093  // cmpxchg.success:
1094  // fence?
1095  // br label %cmpxchg.end
1096  // cmpxchg.nostore:
1097  // %loaded.nostore = phi [%unreleasedload, %cmpxchg.start],
1098  // [%releasedload,
1099  // %cmpxchg.releasedload/%cmpxchg.trystore]
1100  // @load_linked_fail_balance()?
1101  // br label %cmpxchg.failure
1102  // cmpxchg.failure:
1103  // fence?
1104  // br label %cmpxchg.end
1105  // cmpxchg.end:
1106  // %loaded = phi [%loaded.nostore, %cmpxchg.failure],
1107  // [%loaded.trystore, %cmpxchg.trystore]
1108  // %success = phi i1 [true, %cmpxchg.success], [false, %cmpxchg.failure]
1109  // %restmp = insertvalue { iN, i1 } undef, iN %loaded, 0
1110  // %res = insertvalue { iN, i1 } %restmp, i1 %success, 1
1111  // [...]
1112  BasicBlock *ExitBB = BB->splitBasicBlock(CI->getIterator(), "cmpxchg.end");
1113  auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
1114  auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
1115  auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
1116  auto ReleasedLoadBB =
1117  BasicBlock::Create(Ctx, "cmpxchg.releasedload", F, SuccessBB);
1118  auto TryStoreBB =
1119  BasicBlock::Create(Ctx, "cmpxchg.trystore", F, ReleasedLoadBB);
1120  auto ReleasingStoreBB =
1121  BasicBlock::Create(Ctx, "cmpxchg.fencedstore", F, TryStoreBB);
1122  auto StartBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, ReleasingStoreBB);
1123 
1124  // This grabs the DebugLoc from CI
1125  IRBuilder<> Builder(CI);
1126 
1127  // The split call above "helpfully" added a branch at the end of BB (to the
1128  // wrong place), but we might want a fence too. It's easiest to just remove
1129  // the branch entirely.
1130  std::prev(BB->end())->eraseFromParent();
1131  Builder.SetInsertPoint(BB);
1132  if (ShouldInsertFencesForAtomic && UseUnconditionalReleaseBarrier)
1133  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1134  Builder.CreateBr(StartBB);
1135 
1136  // Start the main loop block now that we've taken care of the preliminaries.
1137  Builder.SetInsertPoint(StartBB);
1138  Value *UnreleasedLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1139  Value *ShouldStore = Builder.CreateICmpEQ(
1140  UnreleasedLoad, CI->getCompareOperand(), "should_store");
1141 
1142  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1143  // jump straight past that fence instruction (if it exists).
1144  Builder.CreateCondBr(ShouldStore, ReleasingStoreBB, NoStoreBB);
1145 
1146  Builder.SetInsertPoint(ReleasingStoreBB);
1147  if (ShouldInsertFencesForAtomic && !UseUnconditionalReleaseBarrier)
1148  TLI->emitLeadingFence(Builder, CI, SuccessOrder);
1149  Builder.CreateBr(TryStoreBB);
1150 
1151  Builder.SetInsertPoint(TryStoreBB);
1152  Value *StoreSuccess = TLI->emitStoreConditional(
1153  Builder, CI->getNewValOperand(), Addr, MemOpOrder);
1154  StoreSuccess = Builder.CreateICmpEQ(
1155  StoreSuccess, ConstantInt::get(Type::getInt32Ty(Ctx), 0), "success");
1156  BasicBlock *RetryBB = HasReleasedLoadBB ? ReleasedLoadBB : StartBB;
1157  Builder.CreateCondBr(StoreSuccess, SuccessBB,
1158  CI->isWeak() ? FailureBB : RetryBB);
1159 
1160  Builder.SetInsertPoint(ReleasedLoadBB);
1161  Value *SecondLoad;
1162  if (HasReleasedLoadBB) {
1163  SecondLoad = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
1164  ShouldStore = Builder.CreateICmpEQ(SecondLoad, CI->getCompareOperand(),
1165  "should_store");
1166 
1167  // If the cmpxchg doesn't actually need any ordering when it fails, we can
1168  // jump straight past that fence instruction (if it exists).
1169  Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
1170  } else
1171  Builder.CreateUnreachable();
1172 
1173  // Make sure later instructions don't get reordered with a fence if
1174  // necessary.
1175  Builder.SetInsertPoint(SuccessBB);
1176  if (ShouldInsertFencesForAtomic)
1177  TLI->emitTrailingFence(Builder, CI, SuccessOrder);
1178  Builder.CreateBr(ExitBB);
1179 
1180  Builder.SetInsertPoint(NoStoreBB);
1181  // In the failing case, where we don't execute the store-conditional, the
1182  // target might want to balance out the load-linked with a dedicated
1183  // instruction (e.g., on ARM, clearing the exclusive monitor).
1184  TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
1185  Builder.CreateBr(FailureBB);
1186 
1187  Builder.SetInsertPoint(FailureBB);
1188  if (ShouldInsertFencesForAtomic)
1189  TLI->emitTrailingFence(Builder, CI, FailureOrder);
1190  Builder.CreateBr(ExitBB);
1191 
1192  // Finally, we have control-flow based knowledge of whether the cmpxchg
1193  // succeeded or not. We expose this to later passes by converting any
1194  // subsequent "icmp eq/ne %loaded, %oldval" into a use of an appropriate
1195  // PHI.
1196  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1197  PHINode *Success = Builder.CreatePHI(Type::getInt1Ty(Ctx), 2);
1198  Success->addIncoming(ConstantInt::getTrue(Ctx), SuccessBB);
1199  Success->addIncoming(ConstantInt::getFalse(Ctx), FailureBB);
1200 
1201  // Setup the builder so we can create any PHIs we need.
1202  Value *Loaded;
1203  if (!HasReleasedLoadBB)
1204  Loaded = UnreleasedLoad;
1205  else {
1206  Builder.SetInsertPoint(TryStoreBB, TryStoreBB->begin());
1207  PHINode *TryStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1208  TryStoreLoaded->addIncoming(UnreleasedLoad, ReleasingStoreBB);
1209  TryStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1210 
1211  Builder.SetInsertPoint(NoStoreBB, NoStoreBB->begin());
1212  PHINode *NoStoreLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1213  NoStoreLoaded->addIncoming(UnreleasedLoad, StartBB);
1214  NoStoreLoaded->addIncoming(SecondLoad, ReleasedLoadBB);
1215 
1216  Builder.SetInsertPoint(ExitBB, ++ExitBB->begin());
1217  PHINode *ExitLoaded = Builder.CreatePHI(UnreleasedLoad->getType(), 2);
1218  ExitLoaded->addIncoming(TryStoreLoaded, SuccessBB);
1219  ExitLoaded->addIncoming(NoStoreLoaded, FailureBB);
1220 
1221  Loaded = ExitLoaded;
1222  }
1223 
1224  // Look for any users of the cmpxchg that are just comparing the loaded value
1225  // against the desired one, and replace them with the CFG-derived version.
1227  for (auto User : CI->users()) {
1229  if (!EV)
1230  continue;
1231 
1232  assert(EV->getNumIndices() == 1 && EV->getIndices()[0] <= 1 &&
1233  "weird extraction from { iN, i1 }");
1234 
1235  if (EV->getIndices()[0] == 0)
1236  EV->replaceAllUsesWith(Loaded);
1237  else
1238  EV->replaceAllUsesWith(Success);
1239 
1240  PrunedInsts.push_back(EV);
1241  }
1242 
1243  // We can remove the instructions now we're no longer iterating through them.
1244  for (auto EV : PrunedInsts)
1245  EV->eraseFromParent();
1246 
1247  if (!CI->use_empty()) {
1248  // Some use of the full struct return that we don't understand has happened,
1249  // so we've got to reconstruct it properly.
1250  Value *Res;
1251  Res = Builder.CreateInsertValue(UndefValue::get(CI->getType()), Loaded, 0);
1252  Res = Builder.CreateInsertValue(Res, Success, 1);
1253 
1254  CI->replaceAllUsesWith(Res);
1255  }
1256 
1257  CI->eraseFromParent();
1258  return true;
1259 }
1260 
1261 bool AtomicExpand::isIdempotentRMW(AtomicRMWInst* RMWI) {
1262  auto C = dyn_cast<ConstantInt>(RMWI->getValOperand());
1263  if(!C)
1264  return false;
1265 
1267  switch(Op) {
1268  case AtomicRMWInst::Add:
1269  case AtomicRMWInst::Sub:
1270  case AtomicRMWInst::Or:
1271  case AtomicRMWInst::Xor:
1272  return C->isZero();
1273  case AtomicRMWInst::And:
1274  return C->isMinusOne();
1275  // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
1276  default:
1277  return false;
1278  }
1279 }
1280 
1281 bool AtomicExpand::simplifyIdempotentRMW(AtomicRMWInst* RMWI) {
1282  if (auto ResultingLoad = TLI->lowerIdempotentRMWIntoFencedLoad(RMWI)) {
1283  tryExpandAtomicLoad(ResultingLoad);
1284  return true;
1285  }
1286  return false;
1287 }
1288 
1289 Value *AtomicExpand::insertRMWCmpXchgLoop(
1290  IRBuilder<> &Builder, Type *ResultTy, Value *Addr,
1291  AtomicOrdering MemOpOrder,
1292  function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
1293  CreateCmpXchgInstFun CreateCmpXchg) {
1294  LLVMContext &Ctx = Builder.getContext();
1295  BasicBlock *BB = Builder.GetInsertBlock();
1296  Function *F = BB->getParent();
1297 
1298  // Given: atomicrmw some_op iN* %addr, iN %incr ordering
1299  //
1300  // The standard expansion we produce is:
1301  // [...]
1302  // %init_loaded = load atomic iN* %addr
1303  // br label %loop
1304  // loop:
1305  // %loaded = phi iN [ %init_loaded, %entry ], [ %new_loaded, %loop ]
1306  // %new = some_op iN %loaded, %incr
1307  // %pair = cmpxchg iN* %addr, iN %loaded, iN %new
1308  // %new_loaded = extractvalue { iN, i1 } %pair, 0
1309  // %success = extractvalue { iN, i1 } %pair, 1
1310  // br i1 %success, label %atomicrmw.end, label %loop
1311  // atomicrmw.end:
1312  // [...]
1313  BasicBlock *ExitBB =
1314  BB->splitBasicBlock(Builder.GetInsertPoint(), "atomicrmw.end");
1315  BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
1316 
1317  // The split call above "helpfully" added a branch at the end of BB (to the
1318  // wrong place), but we want a load. It's easiest to just remove
1319  // the branch entirely.
1320  std::prev(BB->end())->eraseFromParent();
1321  Builder.SetInsertPoint(BB);
1322  LoadInst *InitLoaded = Builder.CreateLoad(ResultTy, Addr);
1323  // Atomics require at least natural alignment.
1324  InitLoaded->setAlignment(ResultTy->getPrimitiveSizeInBits() / 8);
1325  Builder.CreateBr(LoopBB);
1326 
1327  // Start the main loop block now that we've taken care of the preliminaries.
1328  Builder.SetInsertPoint(LoopBB);
1329  PHINode *Loaded = Builder.CreatePHI(ResultTy, 2, "loaded");
1330  Loaded->addIncoming(InitLoaded, BB);
1331 
1332  Value *NewVal = PerformOp(Builder, Loaded);
1333 
1334  Value *NewLoaded = nullptr;
1335  Value *Success = nullptr;
1336 
1337  CreateCmpXchg(Builder, Addr, Loaded, NewVal,
1338  MemOpOrder == AtomicOrdering::Unordered
1340  : MemOpOrder,
1341  Success, NewLoaded);
1342  assert(Success && NewLoaded);
1343 
1344  Loaded->addIncoming(NewLoaded, LoopBB);
1345 
1346  Builder.CreateCondBr(Success, ExitBB, LoopBB);
1347 
1348  Builder.SetInsertPoint(ExitBB, ExitBB->begin());
1349  return NewLoaded;
1350 }
1351 
1352 bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
1353  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
1354  unsigned ValueSize = getAtomicOpSize(CI);
1355 
1356  switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
1357  default:
1358  llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
1360  if (ValueSize < MinCASSize)
1361  expandPartwordCmpXchg(CI);
1362  return false;
1364  assert(ValueSize >= MinCASSize &&
1365  "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
1366  return expandAtomicCmpXchg(CI);
1367  }
1370  "MaskedIntrinsic expansion of cmpxhg not yet implemented");
1371  }
1372 }
1373 
1374 // Note: This function is exposed externally by AtomicExpandUtils.h
1376  CreateCmpXchgInstFun CreateCmpXchg) {
1377  IRBuilder<> Builder(AI);
1378  Value *Loaded = AtomicExpand::insertRMWCmpXchgLoop(
1379  Builder, AI->getType(), AI->getPointerOperand(), AI->getOrdering(),
1380  [&](IRBuilder<> &Builder, Value *Loaded) {
1381  return performAtomicOp(AI->getOperation(), Builder, Loaded,
1382  AI->getValOperand());
1383  },
1384  CreateCmpXchg);
1385 
1386  AI->replaceAllUsesWith(Loaded);
1387  AI->eraseFromParent();
1388  return true;
1389 }
1390 
1391 // In order to use one of the sized library calls such as
1392 // __atomic_fetch_add_4, the alignment must be sufficient, the size
1393 // must be one of the potentially-specialized sizes, and the value
1394 // type must actually exist in C on the target (otherwise, the
1395 // function wouldn't actually be defined.)
1396 static bool canUseSizedAtomicCall(unsigned Size, unsigned Align,
1397  const DataLayout &DL) {
1398  // TODO: "LargestSize" is an approximation for "largest type that
1399  // you can express in C". It seems to be the case that int128 is
1400  // supported on all 64-bit platforms, otherwise only up to 64-bit
1401  // integers are supported. If we get this wrong, then we'll try to
1402  // call a sized libcall that doesn't actually exist. There should
1403  // really be some more reliable way in LLVM of determining integer
1404  // sizes which are valid in the target's C ABI...
1405  unsigned LargestSize = DL.getLargestLegalIntTypeSizeInBits() >= 64 ? 16 : 8;
1406  return Align >= Size &&
1407  (Size == 1 || Size == 2 || Size == 4 || Size == 8 || Size == 16) &&
1408  Size <= LargestSize;
1409 }
1410 
1411 void AtomicExpand::expandAtomicLoadToLibcall(LoadInst *I) {
1412  static const RTLIB::Libcall Libcalls[6] = {
1413  RTLIB::ATOMIC_LOAD, RTLIB::ATOMIC_LOAD_1, RTLIB::ATOMIC_LOAD_2,
1414  RTLIB::ATOMIC_LOAD_4, RTLIB::ATOMIC_LOAD_8, RTLIB::ATOMIC_LOAD_16};
1415  unsigned Size = getAtomicOpSize(I);
1416  unsigned Align = getAtomicOpAlign(I);
1417 
1418  bool expanded = expandAtomicOpToLibcall(
1419  I, Size, Align, I->getPointerOperand(), nullptr, nullptr,
1420  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1421  (void)expanded;
1422  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Load");
1423 }
1424 
1425 void AtomicExpand::expandAtomicStoreToLibcall(StoreInst *I) {
1426  static const RTLIB::Libcall Libcalls[6] = {
1427  RTLIB::ATOMIC_STORE, RTLIB::ATOMIC_STORE_1, RTLIB::ATOMIC_STORE_2,
1428  RTLIB::ATOMIC_STORE_4, RTLIB::ATOMIC_STORE_8, RTLIB::ATOMIC_STORE_16};
1429  unsigned Size = getAtomicOpSize(I);
1430  unsigned Align = getAtomicOpAlign(I);
1431 
1432  bool expanded = expandAtomicOpToLibcall(
1433  I, Size, Align, I->getPointerOperand(), I->getValueOperand(), nullptr,
1434  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1435  (void)expanded;
1436  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor Store");
1437 }
1438 
1439 void AtomicExpand::expandAtomicCASToLibcall(AtomicCmpXchgInst *I) {
1440  static const RTLIB::Libcall Libcalls[6] = {
1441  RTLIB::ATOMIC_COMPARE_EXCHANGE, RTLIB::ATOMIC_COMPARE_EXCHANGE_1,
1442  RTLIB::ATOMIC_COMPARE_EXCHANGE_2, RTLIB::ATOMIC_COMPARE_EXCHANGE_4,
1443  RTLIB::ATOMIC_COMPARE_EXCHANGE_8, RTLIB::ATOMIC_COMPARE_EXCHANGE_16};
1444  unsigned Size = getAtomicOpSize(I);
1445  unsigned Align = getAtomicOpAlign(I);
1446 
1447  bool expanded = expandAtomicOpToLibcall(
1448  I, Size, Align, I->getPointerOperand(), I->getNewValOperand(),
1450  Libcalls);
1451  (void)expanded;
1452  assert(expanded && "expandAtomicOpToLibcall shouldn't fail tor CAS");
1453 }
1454 
1456  static const RTLIB::Libcall LibcallsXchg[6] = {
1457  RTLIB::ATOMIC_EXCHANGE, RTLIB::ATOMIC_EXCHANGE_1,
1458  RTLIB::ATOMIC_EXCHANGE_2, RTLIB::ATOMIC_EXCHANGE_4,
1459  RTLIB::ATOMIC_EXCHANGE_8, RTLIB::ATOMIC_EXCHANGE_16};
1460  static const RTLIB::Libcall LibcallsAdd[6] = {
1461  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_ADD_1,
1462  RTLIB::ATOMIC_FETCH_ADD_2, RTLIB::ATOMIC_FETCH_ADD_4,
1463  RTLIB::ATOMIC_FETCH_ADD_8, RTLIB::ATOMIC_FETCH_ADD_16};
1464  static const RTLIB::Libcall LibcallsSub[6] = {
1465  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_SUB_1,
1466  RTLIB::ATOMIC_FETCH_SUB_2, RTLIB::ATOMIC_FETCH_SUB_4,
1467  RTLIB::ATOMIC_FETCH_SUB_8, RTLIB::ATOMIC_FETCH_SUB_16};
1468  static const RTLIB::Libcall LibcallsAnd[6] = {
1469  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_AND_1,
1470  RTLIB::ATOMIC_FETCH_AND_2, RTLIB::ATOMIC_FETCH_AND_4,
1471  RTLIB::ATOMIC_FETCH_AND_8, RTLIB::ATOMIC_FETCH_AND_16};
1472  static const RTLIB::Libcall LibcallsOr[6] = {
1473  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_OR_1,
1474  RTLIB::ATOMIC_FETCH_OR_2, RTLIB::ATOMIC_FETCH_OR_4,
1475  RTLIB::ATOMIC_FETCH_OR_8, RTLIB::ATOMIC_FETCH_OR_16};
1476  static const RTLIB::Libcall LibcallsXor[6] = {
1477  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_XOR_1,
1478  RTLIB::ATOMIC_FETCH_XOR_2, RTLIB::ATOMIC_FETCH_XOR_4,
1479  RTLIB::ATOMIC_FETCH_XOR_8, RTLIB::ATOMIC_FETCH_XOR_16};
1480  static const RTLIB::Libcall LibcallsNand[6] = {
1481  RTLIB::UNKNOWN_LIBCALL, RTLIB::ATOMIC_FETCH_NAND_1,
1482  RTLIB::ATOMIC_FETCH_NAND_2, RTLIB::ATOMIC_FETCH_NAND_4,
1483  RTLIB::ATOMIC_FETCH_NAND_8, RTLIB::ATOMIC_FETCH_NAND_16};
1484 
1485  switch (Op) {
1487  llvm_unreachable("Should not have BAD_BINOP.");
1488  case AtomicRMWInst::Xchg:
1489  return makeArrayRef(LibcallsXchg);
1490  case AtomicRMWInst::Add:
1491  return makeArrayRef(LibcallsAdd);
1492  case AtomicRMWInst::Sub:
1493  return makeArrayRef(LibcallsSub);
1494  case AtomicRMWInst::And:
1495  return makeArrayRef(LibcallsAnd);
1496  case AtomicRMWInst::Or:
1497  return makeArrayRef(LibcallsOr);
1498  case AtomicRMWInst::Xor:
1499  return makeArrayRef(LibcallsXor);
1500  case AtomicRMWInst::Nand:
1501  return makeArrayRef(LibcallsNand);
1502  case AtomicRMWInst::Max:
1503  case AtomicRMWInst::Min:
1504  case AtomicRMWInst::UMax:
1505  case AtomicRMWInst::UMin:
1506  // No atomic libcalls are available for max/min/umax/umin.
1507  return {};
1508  }
1509  llvm_unreachable("Unexpected AtomicRMW operation.");
1510 }
1511 
1512 void AtomicExpand::expandAtomicRMWToLibcall(AtomicRMWInst *I) {
1514 
1515  unsigned Size = getAtomicOpSize(I);
1516  unsigned Align = getAtomicOpAlign(I);
1517 
1518  bool Success = false;
1519  if (!Libcalls.empty())
1520  Success = expandAtomicOpToLibcall(
1521  I, Size, Align, I->getPointerOperand(), I->getValOperand(), nullptr,
1522  I->getOrdering(), AtomicOrdering::NotAtomic, Libcalls);
1523 
1524  // The expansion failed: either there were no libcalls at all for
1525  // the operation (min/max), or there were only size-specialized
1526  // libcalls (add/sub/etc) and we needed a generic. So, expand to a
1527  // CAS libcall, via a CAS loop, instead.
1528  if (!Success) {
1529  expandAtomicRMWToCmpXchg(I, [this](IRBuilder<> &Builder, Value *Addr,
1530  Value *Loaded, Value *NewVal,
1531  AtomicOrdering MemOpOrder,
1532  Value *&Success, Value *&NewLoaded) {
1533  // Create the CAS instruction normally...
1534  AtomicCmpXchgInst *Pair = Builder.CreateAtomicCmpXchg(
1535  Addr, Loaded, NewVal, MemOpOrder,
1537  Success = Builder.CreateExtractValue(Pair, 1, "success");
1538  NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
1539 
1540  // ...and then expand the CAS into a libcall.
1541  expandAtomicCASToLibcall(Pair);
1542  });
1543  }
1544 }
1545 
1546 // A helper routine for the above expandAtomic*ToLibcall functions.
1547 //
1548 // 'Libcalls' contains an array of enum values for the particular
1549 // ATOMIC libcalls to be emitted. All of the other arguments besides
1550 // 'I' are extracted from the Instruction subclass by the
1551 // caller. Depending on the particular call, some will be null.
1552 bool AtomicExpand::expandAtomicOpToLibcall(
1553  Instruction *I, unsigned Size, unsigned Align, Value *PointerOperand,
1554  Value *ValueOperand, Value *CASExpected, AtomicOrdering Ordering,
1555  AtomicOrdering Ordering2, ArrayRef<RTLIB::Libcall> Libcalls) {
1556  assert(Libcalls.size() == 6);
1557 
1558  LLVMContext &Ctx = I->getContext();
1559  Module *M = I->getModule();
1560  const DataLayout &DL = M->getDataLayout();
1561  IRBuilder<> Builder(I);
1562  IRBuilder<> AllocaBuilder(&I->getFunction()->getEntryBlock().front());
1563 
1564  bool UseSizedLibcall = canUseSizedAtomicCall(Size, Align, DL);
1565  Type *SizedIntTy = Type::getIntNTy(Ctx, Size * 8);
1566 
1567  unsigned AllocaAlignment = DL.getPrefTypeAlignment(SizedIntTy);
1568 
1569  // TODO: the "order" argument type is "int", not int32. So
1570  // getInt32Ty may be wrong if the arch uses e.g. 16-bit ints.
1571  ConstantInt *SizeVal64 = ConstantInt::get(Type::getInt64Ty(Ctx), Size);
1572  assert(Ordering != AtomicOrdering::NotAtomic && "expect atomic MO");
1573  Constant *OrderingVal =
1574  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering));
1575  Constant *Ordering2Val = nullptr;
1576  if (CASExpected) {
1577  assert(Ordering2 != AtomicOrdering::NotAtomic && "expect atomic MO");
1578  Ordering2Val =
1579  ConstantInt::get(Type::getInt32Ty(Ctx), (int)toCABI(Ordering2));
1580  }
1581  bool HasResult = I->getType() != Type::getVoidTy(Ctx);
1582 
1583  RTLIB::Libcall RTLibType;
1584  if (UseSizedLibcall) {
1585  switch (Size) {
1586  case 1: RTLibType = Libcalls[1]; break;
1587  case 2: RTLibType = Libcalls[2]; break;
1588  case 4: RTLibType = Libcalls[3]; break;
1589  case 8: RTLibType = Libcalls[4]; break;
1590  case 16: RTLibType = Libcalls[5]; break;
1591  }
1592  } else if (Libcalls[0] != RTLIB::UNKNOWN_LIBCALL) {
1593  RTLibType = Libcalls[0];
1594  } else {
1595  // Can't use sized function, and there's no generic for this
1596  // operation, so give up.
1597  return false;
1598  }
1599 
1600  // Build up the function call. There's two kinds. First, the sized
1601  // variants. These calls are going to be one of the following (with
1602  // N=1,2,4,8,16):
1603  // iN __atomic_load_N(iN *ptr, int ordering)
1604  // void __atomic_store_N(iN *ptr, iN val, int ordering)
1605  // iN __atomic_{exchange|fetch_*}_N(iN *ptr, iN val, int ordering)
1606  // bool __atomic_compare_exchange_N(iN *ptr, iN *expected, iN desired,
1607  // int success_order, int failure_order)
1608  //
1609  // Note that these functions can be used for non-integer atomic
1610  // operations, the values just need to be bitcast to integers on the
1611  // way in and out.
1612  //
1613  // And, then, the generic variants. They look like the following:
1614  // void __atomic_load(size_t size, void *ptr, void *ret, int ordering)
1615  // void __atomic_store(size_t size, void *ptr, void *val, int ordering)
1616  // void __atomic_exchange(size_t size, void *ptr, void *val, void *ret,
1617  // int ordering)
1618  // bool __atomic_compare_exchange(size_t size, void *ptr, void *expected,
1619  // void *desired, int success_order,
1620  // int failure_order)
1621  //
1622  // The different signatures are built up depending on the
1623  // 'UseSizedLibcall', 'CASExpected', 'ValueOperand', and 'HasResult'
1624  // variables.
1625 
1626  AllocaInst *AllocaCASExpected = nullptr;
1627  Value *AllocaCASExpected_i8 = nullptr;
1628  AllocaInst *AllocaValue = nullptr;
1629  Value *AllocaValue_i8 = nullptr;
1630  AllocaInst *AllocaResult = nullptr;
1631  Value *AllocaResult_i8 = nullptr;
1632 
1633  Type *ResultTy;
1635  AttributeList Attr;
1636 
1637  // 'size' argument.
1638  if (!UseSizedLibcall) {
1639  // Note, getIntPtrType is assumed equivalent to size_t.
1640  Args.push_back(ConstantInt::get(DL.getIntPtrType(Ctx), Size));
1641  }
1642 
1643  // 'ptr' argument.
1644  Value *PtrVal =
1645  Builder.CreateBitCast(PointerOperand, Type::getInt8PtrTy(Ctx));
1646  Args.push_back(PtrVal);
1647 
1648  // 'expected' argument, if present.
1649  if (CASExpected) {
1650  AllocaCASExpected = AllocaBuilder.CreateAlloca(CASExpected->getType());
1651  AllocaCASExpected->setAlignment(AllocaAlignment);
1652  AllocaCASExpected_i8 =
1653  Builder.CreateBitCast(AllocaCASExpected, Type::getInt8PtrTy(Ctx));
1654  Builder.CreateLifetimeStart(AllocaCASExpected_i8, SizeVal64);
1655  Builder.CreateAlignedStore(CASExpected, AllocaCASExpected, AllocaAlignment);
1656  Args.push_back(AllocaCASExpected_i8);
1657  }
1658 
1659  // 'val' argument ('desired' for cas), if present.
1660  if (ValueOperand) {
1661  if (UseSizedLibcall) {
1662  Value *IntValue =
1663  Builder.CreateBitOrPointerCast(ValueOperand, SizedIntTy);
1664  Args.push_back(IntValue);
1665  } else {
1666  AllocaValue = AllocaBuilder.CreateAlloca(ValueOperand->getType());
1667  AllocaValue->setAlignment(AllocaAlignment);
1668  AllocaValue_i8 =
1669  Builder.CreateBitCast(AllocaValue, Type::getInt8PtrTy(Ctx));
1670  Builder.CreateLifetimeStart(AllocaValue_i8, SizeVal64);
1671  Builder.CreateAlignedStore(ValueOperand, AllocaValue, AllocaAlignment);
1672  Args.push_back(AllocaValue_i8);
1673  }
1674  }
1675 
1676  // 'ret' argument.
1677  if (!CASExpected && HasResult && !UseSizedLibcall) {
1678  AllocaResult = AllocaBuilder.CreateAlloca(I->getType());
1679  AllocaResult->setAlignment(AllocaAlignment);
1680  AllocaResult_i8 =
1681  Builder.CreateBitCast(AllocaResult, Type::getInt8PtrTy(Ctx));
1682  Builder.CreateLifetimeStart(AllocaResult_i8, SizeVal64);
1683  Args.push_back(AllocaResult_i8);
1684  }
1685 
1686  // 'ordering' ('success_order' for cas) argument.
1687  Args.push_back(OrderingVal);
1688 
1689  // 'failure_order' argument, if present.
1690  if (Ordering2Val)
1691  Args.push_back(Ordering2Val);
1692 
1693  // Now, the return type.
1694  if (CASExpected) {
1695  ResultTy = Type::getInt1Ty(Ctx);
1696  Attr = Attr.addAttribute(Ctx, AttributeList::ReturnIndex, Attribute::ZExt);
1697  } else if (HasResult && UseSizedLibcall)
1698  ResultTy = SizedIntTy;
1699  else
1700  ResultTy = Type::getVoidTy(Ctx);
1701 
1702  // Done with setting up arguments and return types, create the call:
1703  SmallVector<Type *, 6> ArgTys;
1704  for (Value *Arg : Args)
1705  ArgTys.push_back(Arg->getType());
1706  FunctionType *FnType = FunctionType::get(ResultTy, ArgTys, false);
1707  Constant *LibcallFn =
1708  M->getOrInsertFunction(TLI->getLibcallName(RTLibType), FnType, Attr);
1709  CallInst *Call = Builder.CreateCall(LibcallFn, Args);
1710  Call->setAttributes(Attr);
1711  Value *Result = Call;
1712 
1713  // And then, extract the results...
1714  if (ValueOperand && !UseSizedLibcall)
1715  Builder.CreateLifetimeEnd(AllocaValue_i8, SizeVal64);
1716 
1717  if (CASExpected) {
1718  // The final result from the CAS is {load of 'expected' alloca, bool result
1719  // from call}
1720  Type *FinalResultTy = I->getType();
1721  Value *V = UndefValue::get(FinalResultTy);
1722  Value *ExpectedOut =
1723  Builder.CreateAlignedLoad(AllocaCASExpected, AllocaAlignment);
1724  Builder.CreateLifetimeEnd(AllocaCASExpected_i8, SizeVal64);
1725  V = Builder.CreateInsertValue(V, ExpectedOut, 0);
1726  V = Builder.CreateInsertValue(V, Result, 1);
1727  I->replaceAllUsesWith(V);
1728  } else if (HasResult) {
1729  Value *V;
1730  if (UseSizedLibcall)
1731  V = Builder.CreateBitOrPointerCast(Result, I->getType());
1732  else {
1733  V = Builder.CreateAlignedLoad(AllocaResult, AllocaAlignment);
1734  Builder.CreateLifetimeEnd(AllocaResult_i8, SizeVal64);
1735  }
1736  I->replaceAllUsesWith(V);
1737  }
1738  I->eraseFromParent();
1739  return true;
1740 }
uint64_t CallInst * C
Value * getValueOperand()
Definition: Instructions.h:399
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks &#39;this&#39; from the containing basic block and deletes it.
Definition: Instruction.cpp:68
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
static ConstantInt * getFalse(LLVMContext &Context)
Definition: Constants.cpp:584
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
BranchInst * CreateCondBr(Value *Cond, BasicBlock *True, BasicBlock *False, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a conditional &#39;br Cond, TrueDest, FalseDest&#39; instruction.
Definition: IRBuilder.h:854
static IntegerType * getInt1Ty(LLVMContext &C)
Definition: Type.cpp:173
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
This instruction extracts a struct member or array element value from an aggregate value...
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
PointerUnion< const Value *, const PseudoSourceValue * > ValueType
*p = old <signed v ? old : v
Definition: Instructions.h:711
LLVMContext & getContext() const
Definition: IRBuilder.h:123
Value * CreateICmpNE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1752
Atomic ordering constants.
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
bool isAtomic() const
Return true if this instruction has an AtomicOrdering of unordered or higher.
static bool atomicSizeSupported(const TargetLowering *TLI, Inst *I)
Constant * getOrInsertFunction(StringRef Name, FunctionType *T, AttributeList AttributeList)
Look up the specified function in the module symbol table.
Definition: Module.cpp:143
Value * CreateXor(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1160
A Module instance is used to store all the information related to an LLVM module. ...
Definition: Module.h:64
void setAlignment(unsigned Align)
an instruction that atomically checks whether a specified value is in a memory location, and, if it is, stores a new value there.
Definition: Instructions.h:518
This class represents a function call, abstracting a target machine&#39;s calling convention.
Libcall
RTLIB::Libcall enum - This enum defines all of the runtime library calls the backend can emit...
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this store instruction.
Definition: Instructions.h:374
static PartwordMaskValues createMaskInstrs(IRBuilder<> &Builder, Instruction *I, Type *ValueType, Value *Addr, unsigned WordSize)
This is a helper function which builds instructions to provide values necessary for partword atomic o...
*p = old <unsigned v ? old : v
Definition: Instructions.h:715
An efficient, type-erasing, non-owning reference to a callable.
Definition: STLExtras.h:107
void setVolatile(bool V)
Specify whether this is a volatile cmpxchg.
Definition: Instructions.h:553
AtomicOrdering getOrdering() const
Returns the ordering constraint of this load instruction.
Definition: Instructions.h:237
*p = old >unsigned v ? old : v
Definition: Instructions.h:713
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:714
F(f)
unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
Definition: DerivedTypes.h:503
char & AtomicExpandID
AtomicExpandID – Lowers atomic operations in terms of either cmpxchg load-linked/store-conditional l...
An instruction for reading from memory.
Definition: Instructions.h:168
static IntegerType * getInt64Ty(LLVMContext &C)
Definition: Type.cpp:177
an instruction that atomically reads a memory location, combines it with another value, and then stores the result back.
Definition: Instructions.h:681
Value * CreateICmpULE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1768
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setAtomic(AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Sets the ordering constraint and the synchronization scope ID of this store instruction.
Definition: Instructions.h:385
*p = old >signed v ? old : v
Definition: Instructions.h:709
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:130
static Constant * getNullValue(Type *Ty)
Constructor to create a &#39;0&#39; constant of arbitrary type.
Definition: Constants.cpp:268
AtomicOrdering getFailureOrdering() const
Returns the failure ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:585
StoreInst * CreateAlignedStore(Value *Val, Value *Ptr, unsigned Align, bool isVolatile=false)
Definition: IRBuilder.h:1358
iterator begin()
Instruction iterator methods.
Definition: BasicBlock.h:263
Value * CreateNot(Value *V, const Twine &Name="")
Definition: IRBuilder.h:1294
ArrayRef< unsigned > getIndices() const
bool isVolatile() const
Return true if this is a load from a volatile memory location.
Definition: Instructions.h:221
inst_iterator inst_begin(Function *F)
Definition: InstIterator.h:132
const DataLayout & getDataLayout() const
Get the data layout for the module&#39;s target platform.
Definition: Module.cpp:364
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
BinOp getOperation() const
Definition: Instructions.h:734
bool isWeak() const
Return true if this cmpxchg may spuriously fail.
Definition: Instructions.h:559
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:743
This file contains the simple types necessary to represent the attributes associated with functions a...
Value * CreateAdd(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:974
bool isVolatile() const
Return true if this is a cmpxchg from a volatile memory location.
Definition: Instructions.h:547
Value * CreateICmpSGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1772
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
AtomicOrdering
Atomic ordering for LLVM&#39;s memory model.
StoreInst * CreateStore(Value *Val, Value *Ptr, bool isVolatile=false)
Definition: IRBuilder.h:1333
bool expandAtomicRMWToCmpXchg(AtomicRMWInst *AI, CreateCmpXchgInstFun CreateCmpXchg)
Expand an atomic RMW instruction into a loop utilizing cmpxchg.
Value * CreateIntToPtr(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1636
Class to represent function types.
Definition: DerivedTypes.h:103
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
Value * CreateBitCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1641
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:245
static bool canUseSizedAtomicCall(unsigned Size, unsigned Align, const DataLayout &DL)
Value * CreateICmpUGT(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1756
BinOp
This enumeration lists the possible modifications atomicrmw can make.
Definition: Instructions.h:693
AtomicOrdering getSuccessOrdering() const
Returns the success ordering constraint of this cmpxchg instruction.
Definition: Instructions.h:572
static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr, Value *Loaded, Value *NewVal, AtomicOrdering MemOpOrder, Value *&Success, Value *&NewLoaded)
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:121
void setAttributes(AttributeList A)
Set the parameter attributes for this call.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory)...
Definition: APInt.h:33
bool isLittleEndian() const
Layout endianness...
Definition: DataLayout.h:221
AtomicCmpXchgInst * CreateAtomicCmpXchg(Value *Ptr, Value *Cmp, Value *New, AtomicOrdering SuccessOrdering, AtomicOrdering FailureOrdering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1372
Value * CreateSub(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:991
An instruction for storing to memory.
Definition: Instructions.h:310
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:439
Value * CreateZExt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1568
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block...
Definition: IRBuilder.h:127
Value * CreateOr(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1142
const BasicBlock & getEntryBlock() const
Definition: Function.h:640
LoadInst * CreateLoad(Value *Ptr, const char *Name)
Provided to resolve &#39;CreateLoad(Ptr, "...")&#39; correctly, instead of converting the string to &#39;bool&#39; fo...
Definition: IRBuilder.h:1317
IntegerType * getIntPtrType(LLVMContext &C, unsigned AddressSpace=0) const
Returns an integer type with size at least as big as that of a pointer in the given address space...
Definition: DataLayout.cpp:742
static bool runOnFunction(Function &F, bool PostInlining)
LLVM Basic Block Representation.
Definition: BasicBlock.h:58
The instances of the Type class are immutable: once they are created, they are never changed...
Definition: Type.h:46
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
UnreachableInst * CreateUnreachable()
Definition: IRBuilder.h:938
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:149
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
This is an important base class in LLVM.
Definition: Constant.h:42
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Value * CreateSelect(Value *C, Value *True, Value *False, const Twine &Name="", Instruction *MDFrom=nullptr)
Definition: IRBuilder.h:1913
const Instruction & front() const
Definition: BasicBlock.h:275
bool isAcquireOrStronger(AtomicOrdering ao)
unsigned getNumIndices() const
unsigned getPrefTypeAlignment(Type *Ty) const
Returns the preferred stack/global alignment for the specified type.
Definition: DataLayout.cpp:732
static Type * getVoidTy(LLVMContext &C)
Definition: Type.cpp:161
unsigned getLargestLegalIntTypeSizeInBits() const
Returns the size of largest legal integer type size, or 0 if none are set.
Definition: DataLayout.cpp:764
static unsigned getAtomicOpSize(LoadInst *LI)
void initializeAtomicExpandPass(PassRegistry &)
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:285
static FunctionType * get(Type *Result, ArrayRef< Type *> Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
Definition: Type.cpp:297
Value * getPointerOperand()
Definition: Instructions.h:274
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition: BasicBlock.h:100
Value * CreateICmpEQ(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1748
self_iterator getIterator()
Definition: ilist_node.h:82
Class to represent integer types.
Definition: DerivedTypes.h:40
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:60
void setAlignment(unsigned Align)
AtomicRMWInst * CreateAtomicRMW(AtomicRMWInst::BinOp Op, Value *Ptr, Value *Val, AtomicOrdering Ordering, SyncScope::ID SSID=SyncScope::System)
Definition: IRBuilder.h:1380
LLVMContext & getContext() const
getContext - Return a reference to the LLVMContext associated with this function. ...
Definition: Function.cpp:194
INITIALIZE_PASS(AtomicExpand, DEBUG_TYPE, "Expand Atomic instructions", false, false) FunctionPass *llvm
Extended Value Type.
Definition: ValueTypes.h:34
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1392
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this cmpxchg instruction.
Definition: Instructions.h:598
Value * CreateExtractValue(Value *Agg, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1975
static PointerType * getInt8PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:220
AtomicOrdering getOrdering() const
Returns the ordering constraint of this rmw instruction.
Definition: Instructions.h:763
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1564
unsigned getMaxAtomicSizeInBitsSupported() const
Returns the maximum atomic operation size (in bits) supported by the backend.
static Value * performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Inc)
Emit IR to implement the given atomicrmw operation on values in registers, returning the new value...
static AtomicOrdering getStrongestFailureOrdering(AtomicOrdering SuccessOrdering)
Returns the strongest permitted ordering on failure, given the desired ordering on success...
Definition: Instructions.h:630
bool isReleaseOrStronger(AtomicOrdering ao)
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition: IRBuilder.h:1878
CallInst * CreateLifetimeEnd(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.end intrinsic.
Definition: IRBuilder.cpp:417
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:240
Value * getValOperand()
Definition: Instructions.h:789
unsigned getStoreSizeInBits() const
Return the number of bits overwritten by a store of the specified value type.
Definition: ValueTypes.h:310
This is the shared class of boolean and integer constants.
Definition: Constants.h:84
iterator end()
Definition: BasicBlock.h:265
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
Module.h This file contains the declarations for the Module class.
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition: Type.cpp:180
static Constant * get(Type *Ty, uint64_t V, bool isSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:621
static ConstantInt * getTrue(LLVMContext &Context)
Definition: Constants.cpp:577
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:133
void setVolatile(bool V)
Specify whether this is a volatile store or not.
Definition: Instructions.h:346
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:56
void setWeak(bool IsWeak)
Definition: Instructions.h:563
iterator_range< user_iterator > users()
Definition: Value.h:400
Value * CreateShl(Value *LHS, Value *RHS, const Twine &Name="", bool HasNUW=false, bool HasNSW=false)
Definition: IRBuilder.h:1063
amdgpu Simplify well known AMD library false Value Value * Arg
#define Success
Value * CreateBitOrPointerCast(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1719
AttributeList addAttribute(LLVMContext &C, unsigned Index, Attribute::AttrKind Kind) const
Add an attribute to the attribute set at the given index.
bool isVolatile() const
Return true if this is a store to a volatile memory location.
Definition: Instructions.h:343
#define DEBUG_TYPE
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:230
static IntegerType * getInt32Ty(LLVMContext &C)
Definition: Type.cpp:176
AtomicOrdering getOrdering() const
Returns the ordering constraint of this store instruction.
Definition: Instructions.h:362
Value * getPointerOperand()
Definition: Instructions.h:785
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:107
SyncScope::ID getSyncScopeID() const
Returns the synchronization scope ID of this load instruction.
Definition: Instructions.h:249
#define I(x, y, z)
Definition: MD5.cpp:58
bool optForMinSize() const
Optimize this function for minimum size (-Oz).
Definition: Function.h:595
static unsigned getAtomicOpAlign(LoadInst *LI)
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
uint32_t Size
Definition: Profile.cpp:47
BasicBlock * splitBasicBlock(iterator I, const Twine &BBName="")
Split the basic block into two basic blocks at the specified instruction.
Definition: BasicBlock.cpp:402
AtomicOrderingCABI toCABI(AtomicOrdering ao)
unsigned getAlignment() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:355
Value * CreateAnd(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1124
Value * CreatePtrToInt(Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1631
Value * CreateCast(Instruction::CastOps Op, Value *V, Type *DestTy, const Twine &Name="")
Definition: IRBuilder.h:1678
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
LoadInst * CreateAlignedLoad(Value *Ptr, unsigned Align, const char *Name)
Provided to resolve &#39;CreateAlignedLoad(Ptr, Align, "...")&#39; correctly, instead of converting the strin...
Definition: IRBuilder.h:1340
unsigned getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:115
LLVM Value Representation.
Definition: Value.h:73
void setAlignment(unsigned Align)
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
Definition: DataLayout.h:411
static ArrayRef< RTLIB::Libcall > GetRMWLibcall(AtomicRMWInst::BinOp Op)
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional &#39;br label X&#39; instruction.
Definition: IRBuilder.h:848
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
BasicBlock::iterator GetInsertPoint() const
Definition: IRBuilder.h:122
Value * CreateLShr(Value *LHS, Value *RHS, const Twine &Name="", bool isExact=false)
Definition: IRBuilder.h:1084
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
static Value * performMaskedAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder, Value *Loaded, Value *Shifted_Inc, Value *Inc, const PartwordMaskValues &PMV)
Emit IR to implement a masked version of a given atomicrmw operation.
CallInst * CreateLifetimeStart(Value *Ptr, ConstantInt *Size=nullptr)
Create a lifetime.start intrinsic.
Definition: IRBuilder.cpp:401
inst_iterator inst_end(Function *F)
Definition: InstIterator.h:133
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:758
Value * CreateInsertValue(Value *Agg, Value *Val, ArrayRef< unsigned > Idxs, const Twine &Name="")
Definition: IRBuilder.h:1983
#define LLVM_DEBUG(X)
Definition: Debug.h:123
FunctionPass * createAtomicExpandPass()
void setVolatile(bool V)
Specify whether this is a volatile load or not.
Definition: Instructions.h:224
Value * CreateICmpSLE(Value *LHS, Value *RHS, const Twine &Name="")
Definition: IRBuilder.h:1784
Value * getPointerOperand()
Definition: Instructions.h:402
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:754
bool use_empty() const
Definition: Value.h:323
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:144
This file describes how to lower LLVM code to machine code.
const BasicBlock * getParent() const
Definition: Instruction.h:67
an instruction to allocate memory on the stack
Definition: Instructions.h:60
A discriminated union of two pointer types, with the discriminator in the low bit of the pointer...
Definition: PointerUnion.h:87
CallInst * CreateCall(Value *Callee, ArrayRef< Value *> Args=None, const Twine &Name="", MDNode *FPMathTag=nullptr)
Definition: IRBuilder.h:1883